19739 lines
974 KiB
Text
19739 lines
974 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_sl.py:549: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 858
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 858
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 175
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data according to scaling law: 1/np.sqrt(x_ncols)
|
|
Train data size: (414, 175)
|
|
Test data size: 0.07559289460184544 (34, 175)
|
|
y_train numbers: Counter({0: 326, 1: 88})
|
|
y_train ratio: 3.7045454545454546
|
|
|
|
y_test_numbers: Counter({0: 27, 1: 7})
|
|
y_test ratio: 3.857142857142857
|
|
-------------------------------------------------------------
|
|
|
|
Simple Random OverSampling
|
|
Counter({1: 326, 0: 326})
|
|
(652, 175)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 88, 1: 88})
|
|
(176, 175)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 326, 1: 326})
|
|
(652, 175)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 326, 0: 326})
|
|
(652, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: scaling law split
|
|
Gene name: embB
|
|
Drug name: ethambutol
|
|
|
|
Output directory: /home/tanu/git/Data/ethambutol/output/ml/tts_sl/
|
|
Sanity checks:
|
|
ML source data size: (448, 175)
|
|
Total input features: (414, 175)
|
|
Target feature numbers: Counter({0: 326, 1: 88})
|
|
Target features ratio: 3.7045454545454546
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13825727 0.08154607 0.08528066 0.09961534 0.08779621 0.08618045
|
|
0.11637926 0.07789564 0.0799129 0.07683444]
|
|
|
|
mean value: 0.09296982288360596
|
|
|
|
key: score_time
|
|
value: [0.03777742 0.02564454 0.02300692 0.02812481 0.03288937 0.0238626
|
|
0.02518201 0.02394128 0.02401376 0.04178333]
|
|
|
|
mean value: 0.02862260341644287
|
|
|
|
key: test_mcc
|
|
value: [0.54494926 0.6333005 0.6333005 0.85634884 0.52265422 0.77972283
|
|
0.6989826 0.6310315 0.57066443 0.66678841]
|
|
|
|
mean value: 0.6537743098920646
|
|
|
|
key: train_mcc
|
|
value: [0.82700789 0.82580084 0.80837281 0.79999931 0.80849337 0.79973104
|
|
0.7997743 0.80849337 0.81082175 0.79307146]
|
|
|
|
mean value: 0.8081566136778171
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.88095238 0.88095238 0.95238095 0.85365854 0.92682927
|
|
0.90243902 0.87804878 0.87804878 0.90243902]
|
|
|
|
mean value: 0.8912891986062718
|
|
|
|
key: train_accuracy
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.94354839 0.94354839 0.93817204 0.93548387 0.9383378 0.93565684
|
|
0.93565684 0.9383378 0.9383378 0.93297587]
|
|
|
|
mean value: 0.9380055637233705
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.70588235 0.70588235 0.875 0.57142857 0.82352941
|
|
0.75 0.70588235 0.54545455 0.71428571]
|
|
|
|
mean value: 0.7022345301757067
|
|
|
|
key: train_fscore
|
|
value: [0.86092715 0.85714286 0.84137931 0.83561644 0.84137931 0.83333333
|
|
0.82857143 0.84137931 0.84563758 0.82758621]
|
|
|
|
mean value: 0.8412952931545316
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.75 0.75 1. 0.8 0.875
|
|
0.85714286 0.75 1. 0.83333333]
|
|
|
|
mean value: 0.8329761904761905
|
|
|
|
key: train_precision
|
|
value: [0.90277778 0.92647059 0.92424242 0.91044776 0.92424242 0.92307692
|
|
0.95081967 0.92424242 0.91304348 0.92307692]
|
|
|
|
mean value: 0.9222440396480238
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.66666667 0.66666667 0.77777778 0.44444444 0.77777778
|
|
0.66666667 0.66666667 0.375 0.625 ]
|
|
|
|
mean value: 0.6222222222222222
|
|
|
|
key: train_recall
|
|
value: [0.82278481 0.79746835 0.7721519 0.7721519 0.7721519 0.75949367
|
|
0.73417722 0.7721519 0.7875 0.75 ]
|
|
|
|
mean value: 0.774003164556962
|
|
|
|
key: test_roc_auc
|
|
value: [0.74747475 0.8030303 0.8030303 0.88888889 0.70659722 0.87326389
|
|
0.81770833 0.80208333 0.6875 0.79734848]
|
|
|
|
mean value: 0.7926925505050505
|
|
|
|
key: train_roc_auc
|
|
value: [0.89944701 0.89020175 0.87754353 0.87583704 0.87757255 0.87124343
|
|
0.86198657 0.87757255 0.88351109 0.86646758]
|
|
|
|
mean value: 0.8781383100071152
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.54545455 0.54545455 0.77777778 0.4 0.7
|
|
0.6 0.54545455 0.375 0.55555556]
|
|
|
|
mean value: 0.5499242424242424
|
|
|
|
key: train_jcc
|
|
value: [0.75581395 0.75 0.72619048 0.71764706 0.72619048 0.71428571
|
|
0.70731707 0.72619048 0.73255814 0.70588235]
|
|
|
|
mean value: 0.7262075720815836
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.85975575 1.78022003 1.30109572 1.05650544 1.55232549 0.93596625
|
|
0.97297597 0.95046806 0.9523139 1.00821924]
|
|
|
|
mean value: 1.2369845867156983
|
|
|
|
key: score_time
|
|
value: [0.02304935 0.02481604 0.01518774 0.01267743 0.0152185 0.01517177
|
|
0.01260853 0.01517558 0.01544213 0.02388382]
|
|
|
|
mean value: 0.01732308864593506
|
|
|
|
key: test_mcc
|
|
value: [0.54494926 0.6333005 0.71717172 0.92884073 0.54237994 0.77972283
|
|
0.6140038 0.6593092 0.75691259 0.75691259]
|
|
|
|
mean value: 0.6933503152621142
|
|
|
|
key: train_mcc
|
|
value: [0.96003759 0.89382193 0.95178641 0.96785761 0.96788082 0.98394041
|
|
0.90199832 0.97605278 0.8701771 0.96817406]
|
|
|
|
mean value: 0.9441727034440436
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.88095238 0.9047619 0.97619048 0.85365854 0.92682927
|
|
0.87804878 0.87804878 0.92682927 0.92682927]
|
|
|
|
mean value: 0.9009291521486643
|
|
|
|
key: train_accuracy
|
|
value: [0.98655914 0.96505376 0.98387097 0.98924731 0.98927614 0.99463807
|
|
0.96782842 0.9919571 0.95710456 0.98927614]
|
|
|
|
mean value: 0.9814811611750123
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.70588235 0.77777778 0.94117647 0.625 0.82352941
|
|
0.66666667 0.73684211 0.76923077 0.76923077]
|
|
|
|
mean value: 0.7440336323463259
|
|
|
|
key: train_fscore
|
|
value: [0.96855346 0.91503268 0.96202532 0.97468354 0.97468354 0.98734177
|
|
0.92105263 0.98113208 0.8961039 0.975 ]
|
|
|
|
mean value: 0.955560891922779
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.75 0.77777778 1. 0.71428571 0.875
|
|
0.83333333 0.7 1. 1. ]
|
|
|
|
mean value: 0.836468253968254
|
|
|
|
key: train_precision
|
|
value: [0.9625 0.94594595 0.96202532 0.97468354 0.97468354 0.98734177
|
|
0.95890411 0.975 0.93243243 0.975 ]
|
|
|
|
mean value: 0.9648516665182609
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.66666667 0.77777778 0.88888889 0.55555556 0.77777778
|
|
0.55555556 0.77777778 0.625 0.625 ]
|
|
|
|
mean value: 0.6805555555555556
|
|
|
|
key: train_recall
|
|
value: [0.97468354 0.88607595 0.96202532 0.97468354 0.97468354 0.98734177
|
|
0.88607595 0.98734177 0.8625 0.975 ]
|
|
|
|
mean value: 0.9470411392405064
|
|
|
|
key: test_roc_auc
|
|
value: [0.74747475 0.8030303 0.85858586 0.94444444 0.74652778 0.87326389
|
|
0.76215278 0.84201389 0.8125 0.8125 ]
|
|
|
|
mean value: 0.8202493686868687
|
|
|
|
key: train_roc_auc
|
|
value: [0.98222232 0.93621204 0.9758932 0.9839288 0.98394041 0.99197021
|
|
0.93793593 0.99026953 0.92271758 0.98408703]
|
|
|
|
mean value: 0.9689177045834534
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.54545455 0.63636364 0.88888889 0.45454545 0.7
|
|
0.5 0.58333333 0.625 0.625 ]
|
|
|
|
mean value: 0.6013131313131312
|
|
|
|
key: train_jcc
|
|
value: [0.93902439 0.84337349 0.92682927 0.95061728 0.95061728 0.975
|
|
0.85365854 0.96296296 0.81176471 0.95121951]
|
|
|
|
mean value: 0.9165067438039527
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01652694 0.01299787 0.01034498 0.01023841 0.01024342 0.01018882
|
|
0.01015377 0.0114944 0.0101347 0.01052117]
|
|
|
|
mean value: 0.011284446716308594
|
|
|
|
key: score_time
|
|
value: [0.01544619 0.00955415 0.0094564 0.00937772 0.00940585 0.00933957
|
|
0.00916338 0.00932145 0.00951695 0.00933099]
|
|
|
|
mean value: 0.009991264343261719
|
|
|
|
key: test_mcc
|
|
value: [0.40344549 0.69727705 0.56883128 0.74471985 0.04600437 0.39091152
|
|
0.4768306 0.56541479 0.58026353 0.39267774]
|
|
|
|
mean value: 0.486637623356521
|
|
|
|
key: train_mcc
|
|
value: [0.69866809 0.65278249 0.66216187 0.652554 0.52389431 0.53613836
|
|
0.6098212 0.64733091 0.62649878 0.65101357]
|
|
|
|
mean value: 0.6260863587107378
|
|
|
|
key: test_accuracy
|
|
value: [0.76190476 0.88095238 0.83333333 0.9047619 0.51219512 0.70731707
|
|
0.80487805 0.82926829 0.85365854 0.73170732]
|
|
|
|
mean value: 0.7819976771196283
|
|
|
|
key: train_accuracy
|
|
value: [0.88709677 0.8655914 0.8655914 0.86290323 0.73726542 0.74798928
|
|
0.84450402 0.87935657 0.85254692 0.86595174]
|
|
|
|
mean value: 0.8408796736717692
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.76190476 0.66666667 0.8 0.33333333 0.53846154
|
|
0.6 0.66666667 0.66666667 0.52173913]
|
|
|
|
mean value: 0.6100893309588962
|
|
|
|
key: train_fscore
|
|
value: [0.76404494 0.72826087 0.73404255 0.72727273 0.608 0.61788618
|
|
0.69473684 0.72392638 0.70899471 0.72826087]
|
|
|
|
mean value: 0.7035426073744735
|
|
|
|
key: test_precision
|
|
value: [0.46153846 0.66666667 0.58333333 0.72727273 0.23809524 0.41176471
|
|
0.54545455 0.58333333 0.6 0.4 ]
|
|
|
|
mean value: 0.5217459011576658
|
|
|
|
key: train_precision
|
|
value: [0.68686869 0.63809524 0.63302752 0.62962963 0.44444444 0.45508982
|
|
0.59459459 0.70238095 0.6146789 0.64423077]
|
|
|
|
mean value: 0.6043040557621945
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.88888889 0.77777778 0.88888889 0.55555556 0.77777778
|
|
0.66666667 0.77777778 0.75 0.75 ]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_recall
|
|
value: [0.86075949 0.84810127 0.87341772 0.86075949 0.96202532 0.96202532
|
|
0.83544304 0.74683544 0.8375 0.8375 ]
|
|
|
|
mean value: 0.8624367088607595
|
|
|
|
key: test_roc_auc
|
|
value: [0.72727273 0.88383838 0.81313131 0.8989899 0.52777778 0.73263889
|
|
0.75520833 0.81076389 0.81439394 0.73863636]
|
|
|
|
mean value: 0.7702651515151515
|
|
|
|
key: train_roc_auc
|
|
value: [0.87747872 0.85920422 0.86844948 0.86212036 0.81944803 0.82625075
|
|
0.84119091 0.83090071 0.84707765 0.85561007]
|
|
|
|
mean value: 0.8487730896350418
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.61538462 0.5 0.66666667 0.2 0.36842105
|
|
0.42857143 0.5 0.5 0.35294118]
|
|
|
|
mean value: 0.4506984939724878
|
|
|
|
key: train_jcc
|
|
value: [0.61818182 0.57264957 0.57983193 0.57142857 0.43678161 0.44705882
|
|
0.53225806 0.56730769 0.54918033 0.57264957]
|
|
|
|
mean value: 0.5447327985100132
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01086617 0.01176739 0.01034856 0.01103044 0.01118231 0.01112795
|
|
0.0103054 0.01032543 0.01044059 0.0103581 ]
|
|
|
|
mean value: 0.010775232315063476
|
|
|
|
key: score_time
|
|
value: [0.01022363 0.01046634 0.00995755 0.01052999 0.00978065 0.00941157
|
|
0.00928855 0.00924921 0.00914741 0.00955892]
|
|
|
|
mean value: 0.009761381149291991
|
|
|
|
key: test_mcc
|
|
value: [ 0.07784989 0.42358687 0.15151515 0.28426762 -0.12009612 0.6989826
|
|
0.48234017 0.54237994 0.00458735 0.25295146]
|
|
|
|
mean value: 0.27983649480059836
|
|
|
|
key: train_mcc
|
|
value: [0.47627568 0.38128402 0.47252142 0.49088992 0.46564258 0.4066905
|
|
0.45377606 0.41364491 0.5140712 0.45094947]
|
|
|
|
mean value: 0.4525745767981807
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.83333333 0.71428571 0.78571429 0.73170732 0.90243902
|
|
0.82926829 0.85365854 0.73170732 0.80487805]
|
|
|
|
mean value: 0.7901277584204414
|
|
|
|
key: train_accuracy
|
|
value: [0.83870968 0.81182796 0.83870968 0.84139785 0.83646113 0.82037534
|
|
0.82841823 0.8230563 0.84718499 0.82841823]
|
|
|
|
mean value: 0.8314559370405604
|
|
|
|
key: test_fscore
|
|
value: [0.25 0.46153846 0.33333333 0.4 0. 0.75
|
|
0.58823529 0.625 0.15384615 0.33333333]
|
|
|
|
mean value: 0.3895286576168929
|
|
|
|
key: train_fscore
|
|
value: [0.56521739 0.48529412 0.55882353 0.58156028 0.55474453 0.5037037
|
|
0.55555556 0.50746269 0.6013986 0.54929577]
|
|
|
|
mean value: 0.5463056169471472
|
|
|
|
key: test_precision
|
|
value: [0.28571429 0.75 0.33333333 0.5 0. 0.85714286
|
|
0.625 0.71428571 0.2 0.5 ]
|
|
|
|
mean value: 0.47654761904761905
|
|
|
|
key: train_precision
|
|
value: [0.66101695 0.57894737 0.66666667 0.66129032 0.65517241 0.60714286
|
|
0.61538462 0.61818182 0.68253968 0.62903226]
|
|
|
|
mean value: 0.6375374951927499
|
|
|
|
key: test_recall
|
|
value: [0.22222222 0.33333333 0.33333333 0.33333333 0. 0.66666667
|
|
0.55555556 0.55555556 0.125 0.25 ]
|
|
|
|
mean value: 0.3375
|
|
|
|
key: train_recall
|
|
value: [0.49367089 0.41772152 0.48101266 0.51898734 0.48101266 0.43037975
|
|
0.50632911 0.43037975 0.5375 0.4875 ]
|
|
|
|
mean value: 0.4784493670886076
|
|
|
|
key: test_roc_auc
|
|
value: [0.53535354 0.65151515 0.57575758 0.62121212 0.46875 0.81770833
|
|
0.73090278 0.74652778 0.50189394 0.59469697]
|
|
|
|
mean value: 0.6244318181818181
|
|
|
|
key: train_roc_auc
|
|
value: [0.71270575 0.66790513 0.70808312 0.72365749 0.70649272 0.67777491
|
|
0.71064755 0.67947559 0.73462031 0.70450085]
|
|
|
|
mean value: 0.7025863422009405
|
|
|
|
key: test_jcc
|
|
value: [0.14285714 0.3 0.2 0.25 0. 0.6
|
|
0.41666667 0.45454545 0.08333333 0.2 ]
|
|
|
|
mean value: 0.2647402597402597
|
|
|
|
key: train_jcc
|
|
value: [0.39393939 0.32038835 0.3877551 0.41 0.38383838 0.33663366
|
|
0.38461538 0.34 0.43 0.37864078]
|
|
|
|
mean value: 0.3765811054013908
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00983429 0.01149964 0.00986004 0.01124573 0.01079226 0.0097723
|
|
0.00985765 0.01113725 0.00995946 0.01075101]
|
|
|
|
mean value: 0.010470962524414063
|
|
|
|
key: score_time
|
|
value: [0.05714202 0.0184226 0.01779532 0.01783371 0.01803923 0.01747918
|
|
0.01673388 0.01775336 0.01698089 0.01827788]
|
|
|
|
mean value: 0.02164580821990967
|
|
|
|
key: test_mcc
|
|
value: [-0.11677484 0.29904999 0.29904999 0.29904999 0.07726439 0.34258008
|
|
0.07726439 0.2981424 0.32113081 0.32113081]
|
|
|
|
mean value: 0.22178879980661068
|
|
|
|
key: train_mcc
|
|
value: [0.48553706 0.41208648 0.41419284 0.38566308 0.46210103 0.43818223
|
|
0.46195215 0.43833981 0.42126307 0.44605107]
|
|
|
|
mean value: 0.4365368805751352
|
|
|
|
key: test_accuracy
|
|
value: [0.73809524 0.80952381 0.80952381 0.80952381 0.75609756 0.80487805
|
|
0.75609756 0.80487805 0.82926829 0.82926829]
|
|
|
|
mean value: 0.7947154471544715
|
|
|
|
key: train_accuracy
|
|
value: [0.84946237 0.83333333 0.83333333 0.82795699 0.84450402 0.83914209
|
|
0.84450402 0.83914209 0.83378016 0.83914209]
|
|
|
|
mean value: 0.8384300498717173
|
|
|
|
key: test_fscore
|
|
value: [0. 0.2 0.2 0.2 0.16666667 0.42857143
|
|
0.16666667 0.2 0.22222222 0.22222222]
|
|
|
|
mean value: 0.2006349206349206
|
|
|
|
key: train_fscore
|
|
value: [0.49090909 0.41509434 0.43636364 0.38461538 0.46296296 0.45454545
|
|
0.47272727 0.42307692 0.42592593 0.44444444]
|
|
|
|
mean value: 0.4410665435193737
|
|
|
|
key: test_precision
|
|
value: [0. 1. 1. 1. 0.33333333 0.6
|
|
0.33333333 1. 1. 1. ]
|
|
|
|
mean value: 0.7266666666666667
|
|
|
|
key: train_precision
|
|
value: [0.87096774 0.81481481 0.77419355 0.8 0.86206897 0.80645161
|
|
0.83870968 0.88 0.82142857 0.85714286]
|
|
|
|
mean value: 0.8325777789548646
|
|
|
|
key: test_recall
|
|
value: [0. 0.11111111 0.11111111 0.11111111 0.11111111 0.33333333
|
|
0.11111111 0.11111111 0.125 0.125 ]
|
|
|
|
mean value: 0.125
|
|
|
|
key: train_recall
|
|
value: [0.34177215 0.27848101 0.30379747 0.25316456 0.3164557 0.3164557
|
|
0.32911392 0.27848101 0.2875 0.3 ]
|
|
|
|
mean value: 0.30052215189873416
|
|
|
|
key: test_roc_auc
|
|
value: [0.46969697 0.55555556 0.55555556 0.55555556 0.52430556 0.63541667
|
|
0.52430556 0.55555556 0.5625 0.5625 ]
|
|
|
|
mean value: 0.5500946969696969
|
|
|
|
key: train_roc_auc
|
|
value: [0.66406014 0.63070808 0.63995334 0.61804986 0.65142513 0.64802377
|
|
0.65605356 0.63413847 0.63521758 0.64317406]
|
|
|
|
mean value: 0.6420803975346565
|
|
|
|
key: test_jcc
|
|
value: [0. 0.11111111 0.11111111 0.11111111 0.09090909 0.27272727
|
|
0.09090909 0.11111111 0.125 0.125 ]
|
|
|
|
mean value: 0.11489898989898989
|
|
|
|
key: train_jcc
|
|
value: [0.3253012 0.26190476 0.27906977 0.23809524 0.30120482 0.29411765
|
|
0.30952381 0.26829268 0.27058824 0.28571429]
|
|
|
|
mean value: 0.28338124520561114
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01600242 0.01665425 0.01603627 0.01654625 0.01582694 0.0164063
|
|
0.01608539 0.01628256 0.01777649 0.01702571]
|
|
|
|
mean value: 0.01646425724029541
|
|
|
|
key: score_time
|
|
value: [0.01065445 0.01069617 0.01087236 0.01136184 0.01069951 0.0107069
|
|
0.01067758 0.01079345 0.01084089 0.01085567]
|
|
|
|
mean value: 0.010815882682800293
|
|
|
|
key: test_mcc
|
|
value: [0.225913 0.30577621 0.42358687 0.42817442 0.15345615 0.52265422
|
|
0.22280797 0.30353867 0.32113081 0.66779184]
|
|
|
|
mean value: 0.35748301719076425
|
|
|
|
key: train_mcc
|
|
value: [0.6529723 0.61806561 0.61615709 0.6081474 0.67251663 0.60634012
|
|
0.66480183 0.67316974 0.6098845 0.65819069]
|
|
|
|
mean value: 0.6380245913294944
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.80952381 0.83333333 0.83333333 0.7804878 0.85365854
|
|
0.7804878 0.80487805 0.82926829 0.90243902]
|
|
|
|
mean value: 0.8213124274099884
|
|
|
|
key: train_accuracy
|
|
value: [0.89247312 0.88172043 0.88172043 0.87903226 0.89812332 0.87935657
|
|
0.89544236 0.89812332 0.87935657 0.89276139]
|
|
|
|
mean value: 0.8878109775433135
|
|
|
|
key: test_fscore
|
|
value: [0.30769231 0.33333333 0.46153846 0.36363636 0.18181818 0.57142857
|
|
0.30769231 0.33333333 0.22222222 0.66666667]
|
|
|
|
mean value: 0.3749361749361749
|
|
|
|
key: train_fscore
|
|
value: [0.69230769 0.62068966 0.62711864 0.60869565 0.703125 0.61538462
|
|
0.68292683 0.6984127 0.62809917 0.68253968]
|
|
|
|
mean value: 0.6559299642880824
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.75 1. 0.5 0.8
|
|
0.5 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.7383333333333333
|
|
|
|
key: train_precision
|
|
value: [0.88235294 0.97297297 0.94871795 0.97222222 0.91836735 0.94736842
|
|
0.95454545 0.93617021 0.92682927 0.93478261]
|
|
|
|
mean value: 0.9394329397380768
|
|
|
|
key: test_recall
|
|
value: [0.22222222 0.22222222 0.33333333 0.22222222 0.11111111 0.44444444
|
|
0.22222222 0.22222222 0.125 0.5 ]
|
|
|
|
mean value: 0.2625
|
|
|
|
key: train_recall
|
|
value: [0.56962025 0.4556962 0.46835443 0.44303797 0.56962025 0.4556962
|
|
0.53164557 0.55696203 0.475 0.5375 ]
|
|
|
|
mean value: 0.5063132911392405
|
|
|
|
key: test_roc_auc
|
|
value: [0.58080808 0.5959596 0.65151515 0.61111111 0.53993056 0.70659722
|
|
0.57986111 0.59548611 0.5625 0.75 ]
|
|
|
|
mean value: 0.617376893939394
|
|
|
|
key: train_roc_auc
|
|
value: [0.77457122 0.72614162 0.73076425 0.7198125 0.77800741 0.72444674
|
|
0.76242142 0.77337897 0.73238055 0.76363055]
|
|
|
|
mean value: 0.7485555218436794
|
|
|
|
key: test_jcc
|
|
value: [0.18181818 0.2 0.3 0.22222222 0.1 0.4
|
|
0.18181818 0.2 0.125 0.5 ]
|
|
|
|
mean value: 0.2410858585858586
|
|
|
|
key: train_jcc
|
|
value: [0.52941176 0.45 0.45679012 0.4375 0.54216867 0.44444444
|
|
0.51851852 0.53658537 0.45783133 0.51807229]
|
|
|
|
mean value: 0.48913225061359206
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.7838552 1.77208853 1.77244234 2.05708766 1.60403633 2.0858171
|
|
1.80553317 2.03801513 2.35855365 1.82596135]
|
|
|
|
mean value: 1.9103390455245972
|
|
|
|
key: score_time
|
|
value: [0.02115822 0.01863265 0.01833153 0.01555753 0.01932693 0.01274967
|
|
0.01269507 0.01553988 0.02369905 0.01336861]
|
|
|
|
mean value: 0.017105913162231444
|
|
|
|
key: test_mcc
|
|
value: [0.43434343 0.6617241 0.71717172 0.85634884 0.52265422 0.6593092
|
|
0.44728753 0.60982417 0.75691259 0.53409091]
|
|
|
|
mean value: 0.6199666720136415
|
|
|
|
key: train_mcc
|
|
value: [0.9839288 0.97603535 0.97603535 0.9839288 0.98394041 0.98394041
|
|
0.97605278 0.98394041 0.97626913 0.97626913]
|
|
|
|
mean value: 0.9800340580889745
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.88095238 0.9047619 0.95238095 0.85365854 0.87804878
|
|
0.82926829 0.85365854 0.92682927 0.85365854]
|
|
|
|
mean value: 0.874274099883856
|
|
|
|
key: train_accuracy
|
|
value: [0.99462366 0.99193548 0.99193548 0.99462366 0.99463807 0.99463807
|
|
0.9919571 0.99463807 0.9919571 0.9919571 ]
|
|
|
|
mean value: 0.9932903802358096
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.73684211 0.77777778 0.875 0.57142857 0.73684211
|
|
0.53333333 0.7 0.76923077 0.625 ]
|
|
|
|
mean value: 0.6881010217852324
|
|
|
|
key: train_fscore
|
|
value: [0.98734177 0.98113208 0.98113208 0.98734177 0.98734177 0.98734177
|
|
0.98113208 0.98734177 0.98136646 0.98136646]
|
|
|
|
mean value: 0.9842838006429246
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.7 0.77777778 1. 0.8 0.7
|
|
0.66666667 0.63636364 1. 0.625 ]
|
|
|
|
mean value: 0.7461363636363636
|
|
|
|
key: train_precision
|
|
value: [0.98734177 0.975 0.975 0.98734177 0.98734177 0.98734177
|
|
0.975 0.98734177 0.97530864 0.97530864]
|
|
|
|
mean value: 0.9812326144710111
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.77777778 0.77777778 0.77777778 0.44444444 0.77777778
|
|
0.44444444 0.77777778 0.625 0.625 ]
|
|
|
|
mean value: 0.6583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.98734177 0.98734177 0.98734177 0.98734177 0.98734177 0.98734177
|
|
0.98734177 0.98734177 0.9875 0.9875 ]
|
|
|
|
mean value: 0.987373417721519
|
|
|
|
key: test_roc_auc
|
|
value: [0.71717172 0.84343434 0.85858586 0.88888889 0.70659722 0.84201389
|
|
0.69097222 0.82638889 0.8125 0.76704545]
|
|
|
|
mean value: 0.7953598484848484
|
|
|
|
key: train_roc_auc
|
|
value: [0.9919644 0.99025792 0.99025792 0.9919644 0.99197021 0.99197021
|
|
0.99026953 0.99197021 0.99033703 0.99033703]
|
|
|
|
mean value: 0.9911298840830668
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.58333333 0.63636364 0.77777778 0.4 0.58333333
|
|
0.36363636 0.53846154 0.625 0.45454545]
|
|
|
|
mean value: 0.5347066822066822
|
|
|
|
key: train_jcc
|
|
value: [0.975 0.96296296 0.96296296 0.975 0.975 0.975
|
|
0.96296296 0.975 0.96341463 0.96341463]
|
|
|
|
mean value: 0.9690718157181571
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02745533 0.02765441 0.02012897 0.02177501 0.01911521 0.01608849
|
|
0.01911664 0.01851773 0.02057862 0.02146387]
|
|
|
|
mean value: 0.021189427375793456
|
|
|
|
key: score_time
|
|
value: [0.01580095 0.00977087 0.01018167 0.00925803 0.00895476 0.00922942
|
|
0.00897217 0.00940061 0.00989866 0.00921059]
|
|
|
|
mean value: 0.01006777286529541
|
|
|
|
key: test_mcc
|
|
value: [0.85858586 0.78107061 0.74471985 0.71717172 0.77972283 0.54237994
|
|
0.79652583 0.6310315 0.84091787 0.92155559]
|
|
|
|
mean value: 0.7613681612927914
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.92857143 0.9047619 0.9047619 0.92682927 0.85365854
|
|
0.92682927 0.87804878 0.95121951 0.97560976]
|
|
|
|
mean value: 0.920267131242741
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.82352941 0.8 0.77777778 0.82352941 0.625
|
|
0.84210526 0.70588235 0.85714286 0.93333333]
|
|
|
|
mean value: 0.807718929677134
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.875 0.72727273 0.77777778 0.875 0.71428571
|
|
0.8 0.75 1. 1. ]
|
|
|
|
mean value: 0.8408225108225108
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.77777778 0.88888889 0.77777778 0.77777778 0.55555556
|
|
0.88888889 0.66666667 0.75 0.875 ]
|
|
|
|
mean value: 0.7847222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92929293 0.87373737 0.8989899 0.85858586 0.87326389 0.74652778
|
|
0.91319444 0.80208333 0.875 0.9375 ]
|
|
|
|
mean value: 0.8708175505050505
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.7 0.66666667 0.63636364 0.7 0.45454545
|
|
0.72727273 0.54545455 0.75 0.875 ]
|
|
|
|
mean value: 0.685530303030303
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11748314 0.1138792 0.11257935 0.118783 0.11806083 0.11977959
|
|
0.12217593 0.11842275 0.1105082 0.118788 ]
|
|
|
|
mean value: 0.11704599857330322
|
|
|
|
key: score_time
|
|
value: [0.01931214 0.01800871 0.01792955 0.01963234 0.01855159 0.01916122
|
|
0.01873064 0.01944232 0.01761746 0.02170444]
|
|
|
|
mean value: 0.019009041786193847
|
|
|
|
key: test_mcc
|
|
value: [0.28426762 0.42358687 0.54494926 0.5247362 0.30353867 0.6310315
|
|
0.34258008 0.6140038 0.46037165 0.46037165]
|
|
|
|
mean value: 0.4589437292532702
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.83333333 0.85714286 0.85714286 0.80487805 0.87804878
|
|
0.80487805 0.87804878 0.85365854 0.85365854]
|
|
|
|
mean value: 0.8406504065040651
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.46153846 0.625 0.57142857 0.33333333 0.70588235
|
|
0.42857143 0.66666667 0.5 0.5 ]
|
|
|
|
mean value: 0.5192420814479638
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.75 0.71428571 0.8 0.66666667 0.75
|
|
0.6 0.83333333 0.75 0.75 ]
|
|
|
|
mean value: 0.7114285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.33333333 0.55555556 0.44444444 0.22222222 0.66666667
|
|
0.33333333 0.55555556 0.375 0.375 ]
|
|
|
|
mean value: 0.41944444444444445
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.62121212 0.65151515 0.74747475 0.70707071 0.59548611 0.80208333
|
|
0.63541667 0.76215278 0.67234848 0.67234848]
|
|
|
|
mean value: 0.6867108585858586
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.3 0.45454545 0.4 0.2 0.54545455
|
|
0.27272727 0.5 0.33333333 0.33333333]
|
|
|
|
mean value: 0.35893939393939395
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0111711 0.01040745 0.01136565 0.01079869 0.01098847 0.01044989
|
|
0.01094103 0.01001263 0.00999689 0.01113796]
|
|
|
|
mean value: 0.01072697639465332
|
|
|
|
key: score_time
|
|
value: [0.00921154 0.01034403 0.00983167 0.00920534 0.00949502 0.00927043
|
|
0.00908327 0.00910687 0.00952053 0.00939202]
|
|
|
|
mean value: 0.009446072578430175
|
|
|
|
key: test_mcc
|
|
value: [-0.04713417 0.43434343 0.18349396 0.18999015 0.00347222 0.4768306
|
|
0.28057127 0.36369648 0.39639387 0.39267774]
|
|
|
|
mean value: 0.26743355609325364
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.61904762 0.80952381 0.69047619 0.73809524 0.65853659 0.80487805
|
|
0.7804878 0.73170732 0.7804878 0.73170732]
|
|
|
|
mean value: 0.7344947735191638
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.2 0.55555556 0.38095238 0.35294118 0.22222222 0.6
|
|
0.4 0.52173913 0.52631579 0.52173913]
|
|
|
|
mean value: 0.42814653855439966
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.18181818 0.55555556 0.33333333 0.375 0.22222222 0.54545455
|
|
0.5 0.42857143 0.45454545 0.4 ]
|
|
|
|
mean value: 0.39965007215007214
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.22222222 0.55555556 0.44444444 0.33333333 0.22222222 0.66666667
|
|
0.33333333 0.66666667 0.625 0.75 ]
|
|
|
|
mean value: 0.48194444444444445
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.47474747 0.71717172 0.6010101 0.59090909 0.50173611 0.75520833
|
|
0.61979167 0.70833333 0.72159091 0.73863636]
|
|
|
|
mean value: 0.64291351010101
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.11111111 0.38461538 0.23529412 0.21428571 0.125 0.42857143
|
|
0.25 0.35294118 0.35714286 0.35294118]
|
|
|
|
mean value: 0.2811902966314731
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.72934055 1.83378863 1.77105832 1.85764003 1.78461933 1.79943681
|
|
1.72764802 1.76972437 1.89383507 1.90175414]
|
|
|
|
mean value: 1.806884527206421
|
|
|
|
key: score_time
|
|
value: [0.10036635 0.10115433 0.10233259 0.09189773 0.10074592 0.13073897
|
|
0.10669971 0.10550141 0.10591745 0.10528898]
|
|
|
|
mean value: 0.10506434440612793
|
|
|
|
key: test_mcc
|
|
value: [0.70391441 0.85858586 0.71717172 0.78107061 0.42139769 0.77972283
|
|
0.71527778 0.6989826 0.66779184 0.84091787]
|
|
|
|
mean value: 0.7184833212496842
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.95238095 0.9047619 0.92857143 0.82926829 0.92682927
|
|
0.90243902 0.90243902 0.90243902 0.95121951]
|
|
|
|
mean value: 0.9105110336817654
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.88888889 0.77777778 0.82352941 0.46153846 0.82352941
|
|
0.77777778 0.75 0.66666667 0.85714286]
|
|
|
|
mean value: 0.7541136967607556
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.88888889 0.77777778 0.875 0.75 0.875
|
|
0.77777778 0.85714286 1. 1. ]
|
|
|
|
mean value: 0.8801587301587301
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.88888889 0.77777778 0.77777778 0.33333333 0.77777778
|
|
0.77777778 0.66666667 0.5 0.75 ]
|
|
|
|
mean value: 0.6805555555555556
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77777778 0.92929293 0.85858586 0.87373737 0.65104167 0.87326389
|
|
0.85763889 0.81770833 0.75 0.875 ]
|
|
|
|
mean value: 0.8264046717171717
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.8 0.63636364 0.7 0.3 0.7
|
|
0.63636364 0.6 0.5 0.75 ]
|
|
|
|
mean value: 0.6178282828282828
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [2.1396358 1.28942776 1.19340086 1.24784899 1.68375421 1.17906475
|
|
1.00073361 1.00206161 2.3973825 0.93770719]
|
|
|
|
mean value: 1.4071017265319825
|
|
|
|
key: score_time
|
|
value: [0.15786266 0.15287995 0.15035939 0.17096448 0.15879273 0.12753606
|
|
0.13729715 0.14079833 0.24412775 0.17703223]
|
|
|
|
mean value: 0.16176507472991944
|
|
|
|
key: test_mcc
|
|
value: [0.42817442 0.78107061 0.61591318 0.78173596 0.30353867 0.6989826
|
|
0.52265422 0.52265422 0.45993311 0.57066443]
|
|
|
|
mean value: 0.5685321428161713
|
|
|
|
key: train_mcc
|
|
value: [0.90193168 0.9186072 0.92675267 0.9184817 0.91853696 0.90194648
|
|
0.92687724 0.92680291 0.91111239 0.8946528 ]
|
|
|
|
mean value: 0.9145702019299435
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.92857143 0.88095238 0.92857143 0.80487805 0.90243902
|
|
0.85365854 0.85365854 0.85365854 0.87804878]
|
|
|
|
mean value: 0.8717770034843205
|
|
|
|
key: train_accuracy
|
|
value: [0.96774194 0.97311828 0.97580645 0.97311828 0.97319035 0.96782842
|
|
0.97587131 0.97587131 0.97050938 0.96514745]
|
|
|
|
mean value: 0.9718203176799561
|
|
|
|
key: test_fscore
|
|
value: [0.36363636 0.82352941 0.66666667 0.8 0.33333333 0.75
|
|
0.57142857 0.57142857 0.4 0.54545455]
|
|
|
|
mean value: 0.5825477463712758
|
|
|
|
key: train_fscore
|
|
value: [0.92105263 0.93333333 0.94117647 0.93421053 0.93421053 0.92
|
|
0.94039735 0.94117647 0.92810458 0.91390728]
|
|
|
|
mean value: 0.9307569169645318
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.83333333 1. 0.66666667 0.85714286
|
|
0.8 0.8 1. 1. ]
|
|
|
|
mean value: 0.8832142857142857
|
|
|
|
key: train_precision
|
|
value: [0.95890411 0.98591549 0.97297297 0.97260274 0.97260274 0.97183099
|
|
0.98611111 0.97297297 0.97260274 0.97183099]
|
|
|
|
mean value: 0.9738346850612913
|
|
|
|
key: test_recall
|
|
value: [0.22222222 0.77777778 0.55555556 0.66666667 0.22222222 0.66666667
|
|
0.44444444 0.44444444 0.25 0.375 ]
|
|
|
|
mean value: 0.46249999999999997
|
|
|
|
key: train_recall
|
|
value: [0.88607595 0.88607595 0.91139241 0.89873418 0.89873418 0.87341772
|
|
0.89873418 0.91139241 0.8875 0.8625 ]
|
|
|
|
mean value: 0.8914556962025316
|
|
|
|
key: test_roc_auc
|
|
value: [0.61111111 0.87373737 0.76262626 0.83333333 0.59548611 0.81770833
|
|
0.70659722 0.70659722 0.625 0.6875 ]
|
|
|
|
mean value: 0.7219696969696969
|
|
|
|
key: train_roc_auc
|
|
value: [0.93791852 0.94133149 0.95228323 0.94595412 0.94596573 0.9333075
|
|
0.94766641 0.95229484 0.94033703 0.92783703]
|
|
|
|
mean value: 0.9424895903408237
|
|
|
|
key: test_jcc
|
|
value: [0.22222222 0.7 0.5 0.66666667 0.2 0.6
|
|
0.4 0.4 0.25 0.375 ]
|
|
|
|
mean value: 0.4313888888888889
|
|
|
|
key: train_jcc
|
|
value: [0.85365854 0.875 0.88888889 0.87654321 0.87654321 0.85185185
|
|
0.8875 0.88888889 0.86585366 0.84146341]
|
|
|
|
mean value: 0.8706191659138813
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02546477 0.00988579 0.00996375 0.01107383 0.01002979 0.00996566
|
|
0.0111177 0.01110387 0.01114964 0.01115584]
|
|
|
|
mean value: 0.012091064453125
|
|
|
|
key: score_time
|
|
value: [0.00998831 0.00883961 0.00892806 0.0097239 0.00888944 0.00888157
|
|
0.00988293 0.00979996 0.00983691 0.0098424 ]
|
|
|
|
mean value: 0.009461307525634765
|
|
|
|
key: test_mcc
|
|
value: [ 0.07784989 0.42358687 0.15151515 0.28426762 -0.12009612 0.6989826
|
|
0.48234017 0.54237994 0.00458735 0.25295146]
|
|
|
|
mean value: 0.27983649480059836
|
|
|
|
key: train_mcc
|
|
value: [0.47627568 0.38128402 0.47252142 0.49088992 0.46564258 0.4066905
|
|
0.45377606 0.41364491 0.5140712 0.45094947]
|
|
|
|
mean value: 0.4525745767981807
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.83333333 0.71428571 0.78571429 0.73170732 0.90243902
|
|
0.82926829 0.85365854 0.73170732 0.80487805]
|
|
|
|
mean value: 0.7901277584204414
|
|
|
|
key: train_accuracy
|
|
value: [0.83870968 0.81182796 0.83870968 0.84139785 0.83646113 0.82037534
|
|
0.82841823 0.8230563 0.84718499 0.82841823]
|
|
|
|
mean value: 0.8314559370405604
|
|
|
|
key: test_fscore
|
|
value: [0.25 0.46153846 0.33333333 0.4 0. 0.75
|
|
0.58823529 0.625 0.15384615 0.33333333]
|
|
|
|
mean value: 0.3895286576168929
|
|
|
|
key: train_fscore
|
|
value: [0.56521739 0.48529412 0.55882353 0.58156028 0.55474453 0.5037037
|
|
0.55555556 0.50746269 0.6013986 0.54929577]
|
|
|
|
mean value: 0.5463056169471472
|
|
|
|
key: test_precision
|
|
value: [0.28571429 0.75 0.33333333 0.5 0. 0.85714286
|
|
0.625 0.71428571 0.2 0.5 ]
|
|
|
|
mean value: 0.47654761904761905
|
|
|
|
key: train_precision
|
|
value: [0.66101695 0.57894737 0.66666667 0.66129032 0.65517241 0.60714286
|
|
0.61538462 0.61818182 0.68253968 0.62903226]
|
|
|
|
mean value: 0.6375374951927499
|
|
|
|
key: test_recall
|
|
value: [0.22222222 0.33333333 0.33333333 0.33333333 0. 0.66666667
|
|
0.55555556 0.55555556 0.125 0.25 ]
|
|
|
|
mean value: 0.3375
|
|
|
|
key: train_recall
|
|
value: [0.49367089 0.41772152 0.48101266 0.51898734 0.48101266 0.43037975
|
|
0.50632911 0.43037975 0.5375 0.4875 ]
|
|
|
|
mean value: 0.4784493670886076
|
|
|
|
key: test_roc_auc
|
|
value: [0.53535354 0.65151515 0.57575758 0.62121212 0.46875 0.81770833
|
|
0.73090278 0.74652778 0.50189394 0.59469697]
|
|
|
|
mean value: 0.6244318181818181
|
|
|
|
key: train_roc_auc
|
|
value: [0.71270575 0.66790513 0.70808312 0.72365749 0.70649272 0.67777491
|
|
0.71064755 0.67947559 0.73462031 0.70450085]
|
|
|
|
mean value: 0.7025863422009405
|
|
|
|
key: test_jcc
|
|
value: [0.14285714 0.3 0.2 0.25 0. 0.6
|
|
0.41666667 0.45454545 0.08333333 0.2 ]
|
|
|
|
mean value: 0.2647402597402597
|
|
|
|
key: train_jcc
|
|
value: [0.39393939 0.32038835 0.3877551 0.41 0.38383838 0.33663366
|
|
0.38461538 0.34 0.43 0.37864078]
|
|
|
|
mean value: 0.3765811054013908
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [1.57921863 1.53817844 4.57131052 5.65595579 4.97904253 5.14335322
|
|
4.43232751 4.83747458 4.0643115 4.83004928]
|
|
|
|
mean value: 4.163122200965882
|
|
|
|
key: score_time
|
|
value: [0.01270485 0.01288128 0.04290867 0.02396774 0.02589917 0.01918221
|
|
0.01912308 0.02631402 0.03656936 0.01719379]
|
|
|
|
mean value: 0.023674416542053222
|
|
|
|
key: test_mcc
|
|
value: [0.93419873 0.93419873 0.79796142 0.87669552 0.85763889 0.77972283
|
|
0.85763889 0.85763889 0.92155559 1. ]
|
|
|
|
mean value: 0.8817249498343762
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.97619048 0.92857143 0.95238095 0.95121951 0.92682927
|
|
0.95121951 0.95121951 0.97560976 1. ]
|
|
|
|
mean value: 0.9589430894308943
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 0.94736842 0.84210526 0.9 0.88888889 0.82352941
|
|
0.88888889 0.88888889 0.93333333 1. ]
|
|
|
|
mean value: 0.9060371517027864
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.9 0.8 0.81818182 0.88888889 0.875
|
|
0.88888889 0.88888889 1. 1. ]
|
|
|
|
mean value: 0.8959848484848485
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.88888889 1. 0.88888889 0.77777778
|
|
0.88888889 0.88888889 0.875 1. ]
|
|
|
|
mean value: 0.9208333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.98484848 0.91414141 0.96969697 0.92881944 0.87326389
|
|
0.92881944 0.92881944 0.9375 1. ]
|
|
|
|
mean value: 0.9450757575757576
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 0.9 0.72727273 0.81818182 0.8 0.7
|
|
0.8 0.8 0.875 1. ]
|
|
|
|
mean value: 0.8320454545454545
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06765056 0.08285642 0.09254718 0.08614326 0.05079794 0.08059192
|
|
0.08231091 0.09519434 0.09031606 0.09135342]
|
|
|
|
mean value: 0.08197619915008544
|
|
|
|
key: score_time
|
|
value: [0.03701258 0.02388382 0.02445459 0.01250029 0.01238322 0.013134
|
|
0.0275991 0.02205539 0.02407956 0.02290916]
|
|
|
|
mean value: 0.022001171112060548
|
|
|
|
key: test_mcc
|
|
value: [0.54494926 0.70064905 0.57575758 0.87669552 0.71527778 0.6310315
|
|
0.54237994 0.65168169 0.75691259 0.75691259]
|
|
|
|
mean value: 0.6752247502781483
|
|
|
|
key: train_mcc
|
|
value: [0.92049683 0.93642283 0.92049683 0.91204431 0.95238371 0.95238371
|
|
0.9364694 0.93576165 0.92126621 0.91290316]
|
|
|
|
mean value: 0.9300628649101216
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.9047619 0.85714286 0.95238095 0.90243902 0.87804878
|
|
0.85365854 0.85365854 0.92682927 0.92682927]
|
|
|
|
mean value: 0.8912891986062718
|
|
|
|
key: train_accuracy
|
|
value: [0.97311828 0.97849462 0.97311828 0.97043011 0.98391421 0.98391421
|
|
0.97855228 0.97855228 0.97319035 0.97050938]
|
|
|
|
mean value: 0.976379399809738
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.75 0.66666667 0.9 0.77777778 0.70588235
|
|
0.625 0.72727273 0.76923077 0.76923077]
|
|
|
|
mean value: 0.7316061063119887
|
|
|
|
key: train_fscore
|
|
value: [0.9375 0.95 0.9375 0.93081761 0.9625 0.9625
|
|
0.95 0.94936709 0.9382716 0.93167702]
|
|
|
|
mean value: 0.94501333222423
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.85714286 0.66666667 0.81818182 0.77777778 0.75
|
|
0.71428571 0.61538462 1. 1. ]
|
|
|
|
mean value: 0.7913725163725164
|
|
|
|
key: train_precision
|
|
value: [0.92592593 0.9382716 0.92592593 0.925 0.95061728 0.95061728
|
|
0.9382716 0.94936709 0.92682927 0.92592593]
|
|
|
|
mean value: 0.9356751912455833
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.66666667 0.66666667 1. 0.77777778 0.66666667
|
|
0.55555556 0.88888889 0.625 0.625 ]
|
|
|
|
mean value: 0.7027777777777777
|
|
|
|
key: train_recall
|
|
value: [0.94936709 0.96202532 0.94936709 0.93670886 0.97468354 0.97468354
|
|
0.96202532 0.94936709 0.95 0.9375 ]
|
|
|
|
mean value: 0.9545727848101265
|
|
|
|
key: test_roc_auc
|
|
value: [0.74747475 0.81818182 0.78787879 0.96969697 0.85763889 0.80208333
|
|
0.74652778 0.86631944 0.8125 0.8125 ]
|
|
|
|
mean value: 0.8220801767676768
|
|
|
|
key: train_roc_auc
|
|
value: [0.96444464 0.97248024 0.96444464 0.95811552 0.98053905 0.98053905
|
|
0.97250926 0.96788082 0.96476109 0.95851109]
|
|
|
|
mean value: 0.9684225396967444
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.6 0.5 0.81818182 0.63636364 0.54545455
|
|
0.45454545 0.57142857 0.625 0.625 ]
|
|
|
|
mean value: 0.583051948051948
|
|
|
|
key: train_jcc
|
|
value: [0.88235294 0.9047619 0.88235294 0.87058824 0.92771084 0.92771084
|
|
0.9047619 0.90361446 0.88372093 0.87209302]
|
|
|
|
mean value: 0.8959668025237554
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01700091 0.01041293 0.00945973 0.00938082 0.00980783 0.00951076
|
|
0.0096736 0.00959182 0.00992084 0.00989866]
|
|
|
|
mean value: 0.010465788841247558
|
|
|
|
key: score_time
|
|
value: [0.01056051 0.00879884 0.0088098 0.00861311 0.00867081 0.00858402
|
|
0.00899649 0.00893307 0.00900865 0.00894117]
|
|
|
|
mean value: 0.008991646766662597
|
|
|
|
key: test_mcc
|
|
value: [0.28426762 0.70064905 0.5247362 0.70391441 0.30353867 0.57291667
|
|
0.28057127 0.48234017 0.33432866 0.31852949]
|
|
|
|
mean value: 0.45057922034163905
|
|
|
|
key: train_mcc
|
|
value: [0.57647565 0.48411535 0.54824666 0.52726386 0.5561788 0.56401034
|
|
0.53807155 0.54319113 0.57376465 0.55865576]
|
|
|
|
mean value: 0.546997375710397
|
|
|
|
key: test_accuracy
|
|
value: [0.78571429 0.9047619 0.85714286 0.9047619 0.80487805 0.85365854
|
|
0.7804878 0.82926829 0.82926829 0.80487805]
|
|
|
|
mean value: 0.8354819976771196
|
|
|
|
key: train_accuracy
|
|
value: [0.86827957 0.84408602 0.86021505 0.85483871 0.86327078 0.86595174
|
|
0.85790885 0.86058981 0.86595174 0.86327078]
|
|
|
|
mean value: 0.8604363054570613
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.75 0.57142857 0.71428571 0.33333333 0.66666667
|
|
0.4 0.58823529 0.36363636 0.42857143]
|
|
|
|
mean value: 0.5216157372039725
|
|
|
|
key: train_fscore
|
|
value: [0.64748201 0.56060606 0.62318841 0.60294118 0.62773723 0.63235294
|
|
0.61313869 0.6119403 0.64788732 0.62773723]
|
|
|
|
mean value: 0.6195011359575966
|
|
|
|
key: test_precision
|
|
value: [0.5 0.85714286 0.8 1. 0.66666667 0.66666667
|
|
0.5 0.625 0.66666667 0.5 ]
|
|
|
|
mean value: 0.6782142857142857
|
|
|
|
key: train_precision
|
|
value: [0.75 0.69811321 0.72881356 0.71929825 0.74137931 0.75438596
|
|
0.72413793 0.74545455 0.74193548 0.75438596]
|
|
|
|
mean value: 0.7357904213012624
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 0.44444444 0.55555556 0.22222222 0.66666667
|
|
0.33333333 0.55555556 0.25 0.375 ]
|
|
|
|
mean value: 0.44027777777777777
|
|
|
|
key: train_recall
|
|
value: [0.56962025 0.46835443 0.5443038 0.51898734 0.5443038 0.5443038
|
|
0.53164557 0.51898734 0.575 0.5375 ]
|
|
|
|
mean value: 0.5353006329113924
|
|
|
|
key: test_roc_auc
|
|
value: [0.62121212 0.81818182 0.70707071 0.77777778 0.59548611 0.78645833
|
|
0.61979167 0.73090278 0.60984848 0.64204545]
|
|
|
|
mean value: 0.6908775252525252
|
|
|
|
key: train_roc_auc
|
|
value: [0.75921286 0.70687346 0.74484814 0.73218992 0.74664169 0.74834237
|
|
0.7386119 0.73568415 0.76019625 0.74485922]
|
|
|
|
mean value: 0.7417459956830186
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.6 0.4 0.55555556 0.2 0.5
|
|
0.25 0.41666667 0.22222222 0.27272727]
|
|
|
|
mean value: 0.3667171717171717
|
|
|
|
key: train_jcc
|
|
value: [0.4787234 0.38947368 0.45263158 0.43157895 0.45744681 0.46236559
|
|
0.44210526 0.44086022 0.47916667 0.45744681]
|
|
|
|
mean value: 0.4491798968079086
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01704407 0.02029037 0.02038693 0.01810551 0.04003406 0.0388875
|
|
0.03933382 0.04400635 0.04520512 0.04177046]
|
|
|
|
mean value: 0.03250641822814941
|
|
|
|
key: score_time
|
|
value: [0.00974464 0.01208925 0.0128572 0.01228189 0.03262424 0.02173567
|
|
0.02031255 0.02697802 0.02192736 0.01659703]
|
|
|
|
mean value: 0.0187147855758667
|
|
|
|
key: test_mcc
|
|
value: [0.61591318 0.42358687 0.6617241 0.531085 0.52265422 0.6989826
|
|
0.6989826 0.60982417 0.66779184 0.66779184]
|
|
|
|
mean value: 0.6098336450543114
|
|
|
|
key: train_mcc
|
|
value: [0.92737397 0.79965711 0.90577461 0.7203473 0.74864658 0.80846528
|
|
0.83432653 0.90882828 0.76678827 0.86966315]
|
|
|
|
mean value: 0.8289871078025104
|
|
|
|
key: test_accuracy
|
|
value: [0.88095238 0.83333333 0.88095238 0.85714286 0.85365854 0.90243902
|
|
0.90243902 0.85365854 0.90243902 0.90243902]
|
|
|
|
mean value: 0.876945412311266
|
|
|
|
key: train_accuracy
|
|
value: [0.97580645 0.93548387 0.96774194 0.91129032 0.91957105 0.9383378
|
|
0.9463807 0.96782842 0.92493298 0.95710456]
|
|
|
|
mean value: 0.9444478076623714
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.46153846 0.73684211 0.5 0.57142857 0.75
|
|
0.75 0.7 0.66666667 0.66666667]
|
|
|
|
mean value: 0.646980913823019
|
|
|
|
key: train_fscore
|
|
value: [0.94267516 0.82857143 0.92592593 0.74418605 0.76923077 0.83687943
|
|
0.86111111 0.92771084 0.8 0.89333333]
|
|
|
|
mean value: 0.8529624049917472
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.75 0.7 1. 0.8 0.85714286
|
|
0.85714286 0.63636364 1. 1. ]
|
|
|
|
mean value: 0.8433982683982684
|
|
|
|
key: train_precision
|
|
value: [0.94871795 0.95081967 0.90361446 0.96 0.98039216 0.9516129
|
|
0.95384615 0.88505747 0.93333333 0.95714286]
|
|
|
|
mean value: 0.9424536954355686
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.33333333 0.77777778 0.33333333 0.44444444 0.66666667
|
|
0.66666667 0.77777778 0.5 0.5 ]
|
|
|
|
mean value: 0.5555555555555556
|
|
|
|
key: train_recall
|
|
value: [0.93670886 0.73417722 0.94936709 0.60759494 0.63291139 0.74683544
|
|
0.78481013 0.97468354 0.7 0.8375 ]
|
|
|
|
mean value: 0.7904588607594937
|
|
|
|
key: test_roc_auc
|
|
value: [0.76262626 0.65151515 0.84343434 0.66666667 0.70659722 0.81770833
|
|
0.81770833 0.82638889 0.75 0.75 ]
|
|
|
|
mean value: 0.7592645202020202
|
|
|
|
key: train_roc_auc
|
|
value: [0.96152849 0.86196915 0.96103167 0.8003845 0.81475502 0.86831568
|
|
0.88730302 0.97033497 0.84317406 0.91363055]
|
|
|
|
mean value: 0.88824271077723
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.3 0.58333333 0.33333333 0.4 0.6
|
|
0.6 0.53846154 0.5 0.5 ]
|
|
|
|
mean value: 0.4855128205128205
|
|
|
|
key: train_jcc
|
|
value: [0.89156627 0.70731707 0.86206897 0.59259259 0.625 0.7195122
|
|
0.75609756 0.86516854 0.66666667 0.80722892]
|
|
|
|
mean value: 0.7493218774093527
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01887393 0.04717493 0.04252505 0.03427696 0.0314486 0.04632211
|
|
0.04026818 0.03947592 0.04457188 0.04065919]
|
|
|
|
mean value: 0.038559675216674805
|
|
|
|
key: score_time
|
|
value: [0.0130012 0.02130556 0.02607536 0.03163552 0.02629685 0.02147627
|
|
0.0198648 0.02625942 0.020051 0.02602553]
|
|
|
|
mean value: 0.02319915294647217
|
|
|
|
key: test_mcc
|
|
value: [0.48076851 0.57575758 0.54494926 0.70391441 0.57291667 0.57587571
|
|
0.30353867 0.69492321 0.32113081 0.75798945]
|
|
|
|
mean value: 0.553176426951692
|
|
|
|
key: train_mcc
|
|
value: [0.89063353 0.95234883 0.87735655 0.85137594 0.94408056 0.70871454
|
|
0.64739598 0.89729558 0.68729186 0.91651264]
|
|
|
|
mean value: 0.8373005994934117
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.85714286 0.85714286 0.9047619 0.85365854 0.80487805
|
|
0.80487805 0.87804878 0.82926829 0.92682927]
|
|
|
|
mean value: 0.8526132404181185
|
|
|
|
key: train_accuracy
|
|
value: [0.95967742 0.98387097 0.95967742 0.9516129 0.98123324 0.86327078
|
|
0.89008043 0.96514745 0.90080429 0.97050938]
|
|
|
|
mean value: 0.9425884286084927
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.66666667 0.625 0.71428571 0.66666667 0.66666667
|
|
0.33333333 0.76190476 0.22222222 0.8 ]
|
|
|
|
mean value: 0.6056746031746032
|
|
|
|
key: train_fscore
|
|
value: [0.9122807 0.9625 0.90196078 0.87837838 0.95597484 0.75598086
|
|
0.65546218 0.91925466 0.70866142 0.93413174]
|
|
|
|
mean value: 0.8584585565566628
|
|
|
|
key: test_precision
|
|
value: [0.54545455 0.66666667 0.71428571 1. 0.66666667 0.53333333
|
|
0.66666667 0.66666667 1. 0.85714286]
|
|
|
|
mean value: 0.7316883116883117
|
|
|
|
key: train_precision
|
|
value: [0.84782609 0.95061728 0.93243243 0.94202899 0.95 0.60769231
|
|
0.975 0.90243902 0.95744681 0.89655172]
|
|
|
|
mean value: 0.8962034653577938
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.55555556 0.55555556 0.66666667 0.88888889
|
|
0.22222222 0.88888889 0.125 0.75 ]
|
|
|
|
mean value: 0.5986111111111111
|
|
|
|
key: train_recall
|
|
value: [0.98734177 0.97468354 0.87341772 0.82278481 0.96202532 1.
|
|
0.49367089 0.93670886 0.5625 0.975 ]
|
|
|
|
mean value: 0.8588132911392405
|
|
|
|
key: test_roc_auc
|
|
value: [0.75757576 0.78787879 0.74747475 0.77777778 0.78645833 0.83506944
|
|
0.59548611 0.88194444 0.5625 0.85984848]
|
|
|
|
mean value: 0.7592013888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.9697801 0.98051583 0.92817644 0.90456647 0.97420994 0.91326531
|
|
0.74513476 0.95474899 0.77783703 0.97214164]
|
|
|
|
mean value: 0.9120376501898984
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.5 0.45454545 0.55555556 0.5 0.5
|
|
0.2 0.61538462 0.125 0.66666667]
|
|
|
|
mean value: 0.45457237207237206
|
|
|
|
key: train_jcc
|
|
value: [0.83870968 0.92771084 0.82142857 0.78313253 0.91566265 0.60769231
|
|
0.4875 0.85057471 0.54878049 0.87640449]
|
|
|
|
mean value: 0.7657596275467198
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21693301 0.32577515 0.22722745 0.24830532 0.21669579 0.21797705
|
|
0.21818995 0.21728969 0.21963811 0.21825624]
|
|
|
|
mean value: 0.23262877464294435
|
|
|
|
key: score_time
|
|
value: [0.02566838 0.02426243 0.02471066 0.02141333 0.02163672 0.0219543
|
|
0.02171063 0.02121949 0.02128983 0.02129984]
|
|
|
|
mean value: 0.022516560554504395
|
|
|
|
key: test_mcc
|
|
value: [1. 0.93419873 0.79796142 0.87669552 0.77972283 0.77972283
|
|
0.93374247 0.85763889 0.92155559 1. ]
|
|
|
|
mean value: 0.8881238291389542
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97619048 0.92857143 0.95238095 0.92682927 0.92682927
|
|
0.97560976 0.95121951 0.97560976 1. ]
|
|
|
|
mean value: 0.9613240418118467
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94736842 0.84210526 0.9 0.82352941 0.82352941
|
|
0.94736842 0.88888889 0.93333333 1. ]
|
|
|
|
mean value: 0.9106123151014792
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.9 0.8 0.81818182 0.875 0.875
|
|
0.9 0.88888889 1. 1. ]
|
|
|
|
mean value: 0.9057070707070707
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.88888889 1. 0.77777778 0.77777778
|
|
1. 0.88888889 0.875 1. ]
|
|
|
|
mean value: 0.9208333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.98484848 0.91414141 0.96969697 0.87326389 0.87326389
|
|
0.984375 0.92881944 0.9375 1. ]
|
|
|
|
mean value: 0.946590909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.9 0.72727273 0.81818182 0.7 0.7
|
|
0.9 0.8 0.875 1. ]
|
|
|
|
mean value: 0.8420454545454545
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07412887 0.06783414 0.06378627 0.0729394 0.0683949 0.07100201
|
|
0.08421254 0.08958602 0.08135438 0.08704138]
|
|
|
|
mean value: 0.07602798938751221
|
|
|
|
key: score_time
|
|
value: [0.02370763 0.02277517 0.02455759 0.02361369 0.02364612 0.02368784
|
|
0.02302051 0.02426982 0.02486539 0.02800369]
|
|
|
|
mean value: 0.024214744567871094
|
|
|
|
key: test_mcc
|
|
value: [0.78107061 0.85858586 0.79796142 0.93419873 0.85763889 0.85763889
|
|
0.77972283 0.77972283 0.84091787 1. ]
|
|
|
|
mean value: 0.8487457933134953
|
|
|
|
key: train_mcc
|
|
value: [0.98420082 0.96768499 0.99203311 1. 0.98391963 0.98421232
|
|
0.98394041 0.98394041 0.98408703 0.99211062]
|
|
|
|
mean value: 0.9856129343024994
|
|
|
|
key: test_accuracy
|
|
value: [0.92857143 0.95238095 0.92857143 0.97619048 0.95121951 0.95121951
|
|
0.92682927 0.92682927 0.95121951 1. ]
|
|
|
|
mean value: 0.9493031358885018
|
|
|
|
key: train_accuracy
|
|
value: [0.99462366 0.98924731 0.99731183 1. 0.99463807 0.99463807
|
|
0.99463807 0.99463807 0.99463807 0.99731903]
|
|
|
|
mean value: 0.9951692179076941
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.88888889 0.84210526 0.94736842 0.88888889 0.88888889
|
|
0.82352941 0.82352941 0.85714286 1. ]
|
|
|
|
mean value: 0.8783871443314168
|
|
|
|
key: train_fscore
|
|
value: [0.9875 0.97435897 0.99371069 1. 0.98717949 0.9875
|
|
0.98734177 0.98734177 0.9875 0.99378882]
|
|
|
|
mean value: 0.9886221517541935
|
|
|
|
key: test_precision
|
|
value: [0.875 0.88888889 0.8 0.9 0.88888889 0.88888889
|
|
0.875 0.875 1. 1. ]
|
|
|
|
mean value: 0.8991666666666667
|
|
|
|
key: train_precision
|
|
value: [0.97530864 0.98701299 0.9875 1. 1. 0.97530864
|
|
0.98734177 0.98734177 0.9875 0.98765432]
|
|
|
|
mean value: 0.9874968136255056
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.88888889 0.88888889 1. 0.88888889 0.88888889
|
|
0.77777778 0.77777778 0.75 1. ]
|
|
|
|
mean value: 0.8638888888888889
|
|
|
|
key: train_recall
|
|
value: [1. 0.96202532 1. 1. 0.97468354 1.
|
|
0.98734177 0.98734177 0.9875 1. ]
|
|
|
|
mean value: 0.9898892405063291
|
|
|
|
key: test_roc_auc
|
|
value: [0.87373737 0.92929293 0.91414141 0.98484848 0.92881944 0.92881944
|
|
0.87326389 0.87326389 0.875 1. ]
|
|
|
|
mean value: 0.9181186868686868
|
|
|
|
key: train_roc_auc
|
|
value: [0.99658703 0.97930617 0.99829352 1. 0.98734177 0.99659864
|
|
0.99197021 0.99197021 0.99204352 0.99829352]
|
|
|
|
mean value: 0.9932404573593381
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.8 0.72727273 0.9 0.8 0.8
|
|
0.7 0.7 0.75 1. ]
|
|
|
|
mean value: 0.7877272727272727
|
|
|
|
key: train_jcc
|
|
value: [0.97530864 0.95 0.9875 1. 0.97468354 0.97530864
|
|
0.975 0.975 0.97530864 0.98765432]
|
|
|
|
mean value: 0.9775763791217378
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17093611 0.23733187 0.23660564 0.1976974 0.24697614 0.21775317
|
|
0.2305522 0.14449883 0.21218038 0.22564006]
|
|
|
|
mean value: 0.21201717853546143
|
|
|
|
key: score_time
|
|
value: [0.03977919 0.03364158 0.03135276 0.03109956 0.04041839 0.03328681
|
|
0.03462434 0.03831649 0.03966093 0.04471827]
|
|
|
|
mean value: 0.03668982982635498
|
|
|
|
key: test_mcc
|
|
value: [-0.11677484 0.29904999 0.02823912 0.29904999 0.22280797 0.30353867
|
|
0.22280797 0.15345615 0. 0.17421709]
|
|
|
|
mean value: 0.15863921022151561
|
|
|
|
key: train_mcc
|
|
value: [0.90262581 0.91083126 0.91083126 0.90262581 0.91088732 0.88621978
|
|
0.90268621 0.89446407 0.90363564 0.90363564]
|
|
|
|
mean value: 0.9028442808292062
|
|
|
|
key: test_accuracy
|
|
value: [0.73809524 0.80952381 0.73809524 0.80952381 0.7804878 0.80487805
|
|
0.7804878 0.7804878 0.80487805 0.80487805]
|
|
|
|
mean value: 0.7851335656213705
|
|
|
|
key: train_accuracy
|
|
value: [0.96774194 0.97043011 0.97043011 0.96774194 0.97050938 0.96246649
|
|
0.96782842 0.96514745 0.96782842 0.96782842]
|
|
|
|
mean value: 0.9677952665109977
|
|
|
|
key: test_fscore
|
|
value: [0. 0.2 0.15384615 0.2 0.30769231 0.33333333
|
|
0.30769231 0.18181818 0. 0.2 ]
|
|
|
|
mean value: 0.1884382284382284
|
|
|
|
key: train_fscore
|
|
value: [0.91780822 0.92517007 0.92517007 0.91780822 0.92517007 0.90277778
|
|
0.91780822 0.91034483 0.91891892 0.91891892]
|
|
|
|
mean value: 0.9179895304817701
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0.25 1. 0.5 0.66666667
|
|
0.5 0.5 0. 0.5 ]
|
|
|
|
mean value: 0.49166666666666664
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.11111111 0.11111111 0.11111111 0.22222222 0.22222222
|
|
0.22222222 0.11111111 0. 0.125 ]
|
|
|
|
mean value: 0.1236111111111111
|
|
|
|
key: train_recall
|
|
value: [0.84810127 0.86075949 0.86075949 0.84810127 0.86075949 0.82278481
|
|
0.84810127 0.83544304 0.85 0.85 ]
|
|
|
|
mean value: 0.8484810126582278
|
|
|
|
key: test_roc_auc
|
|
value: [0.46969697 0.55555556 0.51010101 0.55555556 0.57986111 0.59548611
|
|
0.57986111 0.53993056 0.5 0.54734848]
|
|
|
|
mean value: 0.5433396464646465
|
|
|
|
key: train_roc_auc
|
|
value: [0.92405063 0.93037975 0.93037975 0.92405063 0.93037975 0.91139241
|
|
0.92405063 0.91772152 0.925 0.925 ]
|
|
|
|
mean value: 0.9242405063291139
|
|
|
|
key: test_jcc
|
|
value: [0. 0.11111111 0.08333333 0.11111111 0.18181818 0.2
|
|
0.18181818 0.1 0. 0.11111111]
|
|
|
|
mean value: 0.10803030303030303
|
|
|
|
key: train_jcc
|
|
value: [0.84810127 0.86075949 0.86075949 0.84810127 0.86075949 0.82278481
|
|
0.84810127 0.83544304 0.85 0.85 ]
|
|
|
|
mean value: 0.8484810126582278
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.9040575 0.74663472 0.82253218 0.82421541 0.84631276 0.84147477
|
|
0.57430267 0.56891108 0.59025025 0.56744289]
|
|
|
|
mean value: 0.7286134243011475
|
|
|
|
key: score_time
|
|
value: [0.01396084 0.01353455 0.01468349 0.01511073 0.01360822 0.00991464
|
|
0.00947881 0.00922632 0.00964713 0.00963688]
|
|
|
|
mean value: 0.011880159378051758
|
|
|
|
key: test_mcc
|
|
value: [0.93419873 0.93419873 0.79796142 0.87669552 0.85763889 0.71527778
|
|
0.6310315 0.77972283 0.92155559 0.92155559]
|
|
|
|
mean value: 0.8369836591412185
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97619048 0.97619048 0.92857143 0.95238095 0.95121951 0.90243902
|
|
0.87804878 0.92682927 0.97560976 0.97560976]
|
|
|
|
mean value: 0.9443089430894309
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 0.94736842 0.84210526 0.9 0.88888889 0.77777778
|
|
0.70588235 0.82352941 0.93333333 0.93333333]
|
|
|
|
mean value: 0.8699587203302374
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.9 0.8 0.81818182 0.88888889 0.77777778
|
|
0.75 0.875 1. 1. ]
|
|
|
|
mean value: 0.8709848484848485
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.88888889 1. 0.88888889 0.77777778
|
|
0.66666667 0.77777778 0.875 0.875 ]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.98484848 0.91414141 0.96969697 0.92881944 0.85763889
|
|
0.80208333 0.87326389 0.9375 0.9375 ]
|
|
|
|
mean value: 0.9190340909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 0.9 0.72727273 0.81818182 0.8 0.63636364
|
|
0.54545455 0.7 0.875 0.875 ]
|
|
|
|
mean value: 0.7777272727272727
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03040314 0.02820063 0.0325284 0.02842927 0.05756855 0.05813336
|
|
0.02869725 0.0279367 0.03112125 0.03650689]
|
|
|
|
mean value: 0.03595254421234131
|
|
|
|
key: score_time
|
|
value: [0.01271629 0.01393723 0.01270533 0.01889229 0.02039051 0.02033591
|
|
0.01599193 0.01624131 0.03544235 0.01930523]
|
|
|
|
mean value: 0.01859583854675293
|
|
|
|
key: test_mcc
|
|
value: [-0.23354968 0.02823912 -0.16943475 -0.16943475 0.07726439 -0.08385255
|
|
-0.08385255 -0.17437146 0. -0.11149893]
|
|
|
|
mean value: -0.0920491158947519
|
|
|
|
key: train_mcc
|
|
value: [0.24657858 0. 0.17364717 0.17364717 0.14164073 0.14164073
|
|
0.10002041 0.20085236 0.09922414 0.17232512]
|
|
|
|
mean value: 0.14495764120810134
|
|
|
|
key: test_accuracy
|
|
value: [0.61904762 0.73809524 0.69047619 0.69047619 0.75609756 0.75609756
|
|
0.75609756 0.68292683 0.80487805 0.75609756]
|
|
|
|
mean value: 0.7250290360046457
|
|
|
|
key: train_accuracy
|
|
value: [0.80376344 0.78763441 0.79569892 0.79569892 0.79356568 0.79356568
|
|
0.79088472 0.79892761 0.78820375 0.79356568]
|
|
|
|
mean value: 0.7941508835653953
|
|
|
|
key: test_fscore
|
|
value: [0. 0.15384615 0. 0. 0.16666667 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.03205128205128205
|
|
|
|
key: train_fscore
|
|
value: [0.14117647 0. 0.07317073 0.07317073 0.04938272 0.04938272
|
|
0.025 0.09638554 0.02469136 0.07228916]
|
|
|
|
mean value: 0.0604649422921507
|
|
|
|
key: test_precision
|
|
value: [0. 0.25 0. 0. 0.33333333 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.058333333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 0. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_recall
|
|
value: [0. 0.11111111 0. 0. 0.11111111 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.02222222222222222
|
|
|
|
key: train_recall
|
|
value: [0.07594937 0. 0.03797468 0.03797468 0.02531646 0.02531646
|
|
0.01265823 0.05063291 0.0125 0.0375 ]
|
|
|
|
mean value: 0.03158227848101266
|
|
|
|
key: test_roc_auc
|
|
value: [0.39393939 0.51010101 0.43939394 0.43939394 0.52430556 0.484375
|
|
0.484375 0.4375 0.5 0.46969697]
|
|
|
|
mean value: 0.46830808080808084
|
|
|
|
key: train_roc_auc
|
|
value: [0.53797468 0.5 0.51898734 0.51898734 0.51265823 0.51265823
|
|
0.50632911 0.52531646 0.50625 0.51875 ]
|
|
|
|
mean value: 0.5157911392405063
|
|
|
|
key: test_jcc
|
|
value: [0. 0.08333333 0. 0. 0.09090909 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.017424242424242425
|
|
|
|
key: train_jcc
|
|
value: [0.07594937 0. 0.03797468 0.03797468 0.02531646 0.02531646
|
|
0.01265823 0.05063291 0.0125 0.0375 ]
|
|
|
|
mean value: 0.03158227848101266
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02315354 0.01530075 0.03732443 0.03772879 0.03844643 0.03985286
|
|
0.04924798 0.0379231 0.03757787 0.0376873 ]
|
|
|
|
mean value: 0.035424304008483884
|
|
|
|
key: score_time
|
|
value: [0.01255631 0.01216888 0.02392626 0.02346683 0.02286768 0.02408695
|
|
0.02419138 0.02475119 0.02242708 0.02230811]
|
|
|
|
mean value: 0.02127506732940674
|
|
|
|
key: test_mcc
|
|
value: [0.54494926 0.61591318 0.71717172 0.78107061 0.77972283 0.77972283
|
|
0.6140038 0.52209256 0.75691259 0.84091787]
|
|
|
|
mean value: 0.6952477252887368
|
|
|
|
key: train_mcc
|
|
value: [0.89508125 0.89433543 0.90279179 0.87803336 0.89440973 0.88593688
|
|
0.89440973 0.88593688 0.87860668 0.87860668]
|
|
|
|
mean value: 0.8888148407176151
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.88095238 0.9047619 0.92857143 0.92682927 0.92682927
|
|
0.87804878 0.82926829 0.92682927 0.95121951]
|
|
|
|
mean value: 0.9010452961672474
|
|
|
|
key: train_accuracy
|
|
value: [0.96505376 0.96505376 0.96774194 0.95967742 0.96514745 0.96246649
|
|
0.96514745 0.96246649 0.95978552 0.95978552]
|
|
|
|
mean value: 0.9632325809334371
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.66666667 0.77777778 0.82352941 0.82352941 0.82352941
|
|
0.66666667 0.63157895 0.76923077 0.85714286]
|
|
|
|
mean value: 0.7464651920147276
|
|
|
|
key: train_fscore
|
|
value: [0.91719745 0.91612903 0.92307692 0.90322581 0.91612903 0.90909091
|
|
0.91612903 0.90909091 0.90322581 0.90322581]
|
|
|
|
mean value: 0.9116520709617073
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.77777778 0.875 0.875 0.875
|
|
0.83333333 0.6 1. 1. ]
|
|
|
|
mean value: 0.8383730158730158
|
|
|
|
key: train_precision
|
|
value: [0.92307692 0.93421053 0.93506494 0.92105263 0.93421053 0.93333333
|
|
0.93421053 0.93333333 0.93333333 0.93333333]
|
|
|
|
mean value: 0.9315159402001507
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.55555556 0.77777778 0.77777778 0.77777778 0.77777778
|
|
0.55555556 0.66666667 0.625 0.75 ]
|
|
|
|
mean value: 0.6819444444444445
|
|
|
|
key: train_recall
|
|
value: [0.91139241 0.89873418 0.91139241 0.88607595 0.89873418 0.88607595
|
|
0.89873418 0.88607595 0.875 0.875 ]
|
|
|
|
mean value: 0.8927215189873418
|
|
|
|
key: test_roc_auc
|
|
value: [0.74747475 0.76262626 0.85858586 0.87373737 0.87326389 0.87326389
|
|
0.76215278 0.77083333 0.8125 0.875 ]
|
|
|
|
mean value: 0.8209438131313131
|
|
|
|
key: train_roc_auc
|
|
value: [0.94545729 0.94083467 0.94716378 0.93279907 0.94086369 0.93453457
|
|
0.94086369 0.93453457 0.92896758 0.92896758]
|
|
|
|
mean value: 0.9374986480962108
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.5 0.63636364 0.7 0.7 0.7
|
|
0.5 0.46153846 0.625 0.75 ]
|
|
|
|
mean value: 0.6027447552447552
|
|
|
|
key: train_jcc
|
|
value: [0.84705882 0.8452381 0.85714286 0.82352941 0.8452381 0.83333333
|
|
0.8452381 0.83333333 0.82352941 0.82352941]
|
|
|
|
mean value: 0.8377170868347339
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.35854936 0.34097719 0.34985304 0.48166299 0.38937902 0.36557841
|
|
0.3499918 0.3412137 0.34975195 0.44435978]
|
|
|
|
mean value: 0.377131724357605
|
|
|
|
key: score_time
|
|
value: [0.02343154 0.02412605 0.02369404 0.0236845 0.02388668 0.02413487
|
|
0.02367187 0.02401352 0.02389383 0.02092838]
|
|
|
|
mean value: 0.023546528816223145
|
|
|
|
key: test_mcc
|
|
value: [0.54494926 0.61591318 0.71717172 0.78107061 0.77972283 0.77972283
|
|
0.6140038 0.56541479 0.75691259 0.84091787]
|
|
|
|
mean value: 0.6995799479936676
|
|
|
|
key: train_mcc
|
|
value: [0.89508125 0.89433543 0.90279179 0.87803336 0.89440973 0.88593688
|
|
0.89440973 0.93576165 0.87860668 0.87860668]
|
|
|
|
mean value: 0.8937973177316136
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.88095238 0.9047619 0.92857143 0.92682927 0.92682927
|
|
0.87804878 0.82926829 0.92682927 0.95121951]
|
|
|
|
mean value: 0.9010452961672474
|
|
|
|
key: train_accuracy
|
|
value: [0.96505376 0.96505376 0.96774194 0.95967742 0.96514745 0.96246649
|
|
0.96514745 0.97855228 0.95978552 0.95978552]
|
|
|
|
mean value: 0.964841160021909
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.66666667 0.77777778 0.82352941 0.82352941 0.82352941
|
|
0.66666667 0.66666667 0.76923077 0.85714286]
|
|
|
|
mean value: 0.7499739639445522
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:107: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:110: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.91719745 0.91612903 0.92307692 0.90322581 0.91612903 0.90909091
|
|
0.91612903 0.94936709 0.90322581 0.90322581]
|
|
|
|
mean value: 0.9156796889133758
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.77777778 0.875 0.875 0.875
|
|
0.83333333 0.58333333 1. 1. ]
|
|
|
|
mean value: 0.8367063492063492
|
|
|
|
key: train_precision
|
|
value: [0.92307692 0.93421053 0.93506494 0.92105263 0.93421053 0.93333333
|
|
0.93421053 0.94936709 0.93333333 0.93333333]
|
|
|
|
mean value: 0.9331193157275769
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.55555556 0.77777778 0.77777778 0.77777778 0.77777778
|
|
0.55555556 0.77777778 0.625 0.75 ]
|
|
|
|
mean value: 0.6930555555555555
|
|
|
|
key: train_recall
|
|
value: [0.91139241 0.89873418 0.91139241 0.88607595 0.89873418 0.88607595
|
|
0.89873418 0.94936709 0.875 0.875 ]
|
|
|
|
mean value: 0.8990506329113924
|
|
|
|
key: test_roc_auc
|
|
value: [0.74747475 0.76262626 0.85858586 0.87373737 0.87326389 0.87326389
|
|
0.76215278 0.81076389 0.8125 0.875 ]
|
|
|
|
mean value: 0.8249368686868687
|
|
|
|
key: train_roc_auc
|
|
value: [0.94545729 0.94083467 0.94716378 0.93279907 0.94086369 0.93453457
|
|
0.94086369 0.96788082 0.92896758 0.92896758]
|
|
|
|
mean value: 0.940833273085447
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.5 0.63636364 0.7 0.7 0.7
|
|
0.5 0.5 0.625 0.75 ]
|
|
|
|
mean value: 0.6065909090909091
|
|
|
|
key: train_jcc
|
|
value: [0.84705882 0.8452381 0.85714286 0.82352941 0.8452381 0.83333333
|
|
0.8452381 0.90361446 0.82352941 0.82352941]
|
|
|
|
mean value: 0.8447451992845331
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06172132 0.09037328 0.1193223 0.08857775 0.07014179 0.10274005
|
|
0.0769608 0.04097271 0.03973961 0.0395906 ]
|
|
|
|
mean value: 0.0730140209197998
|
|
|
|
key: score_time
|
|
value: [0.02024794 0.03795671 0.01832414 0.02112007 0.0176785 0.03566527
|
|
0.01544523 0.01500964 0.01500463 0.0150404 ]
|
|
|
|
mean value: 0.021149253845214842
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.85839508 0.91144345 0.81534091 0.93844697 0.87689394
|
|
0.81706198 0.90814394 0.87844611 0.90814394]
|
|
|
|
mean value: 0.8821824907024225
|
|
|
|
key: train_mcc
|
|
value: [0.91160266 0.92150707 0.92168472 0.92845534 0.92168472 0.92523832
|
|
0.92858222 0.93526967 0.93187755 0.91856557]
|
|
|
|
mean value: 0.9244467849823496
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.92424242 0.95384615 0.90769231 0.96923077 0.93846154
|
|
0.90769231 0.95384615 0.93846154 0.95384615]
|
|
|
|
mean value: 0.9401864801864802
|
|
|
|
key: train_accuracy
|
|
value: [0.9556314 0.96075085 0.96081772 0.96422487 0.96081772 0.96252129
|
|
0.96422487 0.96763203 0.96592845 0.95911414]
|
|
|
|
mean value: 0.9621663342849335
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.92957746 0.95652174 0.90909091 0.96969697 0.93939394
|
|
0.90909091 0.95384615 0.93548387 0.95384615]
|
|
|
|
mean value: 0.9410394263698099
|
|
|
|
key: train_fscore
|
|
value: [0.95622896 0.96068376 0.96095076 0.96422487 0.96095076 0.96283784
|
|
0.96458685 0.96763203 0.96610169 0.95973154]
|
|
|
|
mean value: 0.962392906733548
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.86842105 0.91666667 0.90909091 0.96969697 0.93939394
|
|
0.88235294 0.93939394 0.96666667 0.93939394]
|
|
|
|
mean value: 0.929982702411108
|
|
|
|
key: train_precision
|
|
value: [0.94352159 0.96232877 0.95608108 0.96258503 0.95608108 0.95317726
|
|
0.95652174 0.96928328 0.96283784 0.94701987]
|
|
|
|
mean value: 0.9569437536476978
|
|
|
|
key: test_recall
|
|
value: [0.93939394 1. 1. 0.90909091 0.96969697 0.93939394
|
|
0.9375 0.96875 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9538825757575757
|
|
|
|
key: train_recall
|
|
value: [0.96928328 0.95904437 0.96587031 0.96587031 0.96587031 0.97269625
|
|
0.97278912 0.96598639 0.96938776 0.97278912]
|
|
|
|
mean value: 0.967958719323907
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.92424242 0.953125 0.90767045 0.96922348 0.93844697
|
|
0.90814394 0.95407197 0.93797348 0.95407197]
|
|
|
|
mean value: 0.9401515151515152
|
|
|
|
key: train_roc_auc
|
|
value: [0.9556314 0.96075085 0.96082631 0.96422767 0.96082631 0.9625386
|
|
0.96421026 0.96763484 0.96592255 0.9590908 ]
|
|
|
|
mean value: 0.962165958533584
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.86842105 0.91666667 0.83333333 0.94117647 0.88571429
|
|
0.83333333 0.91176471 0.87878788 0.91176471]
|
|
|
|
mean value: 0.8892727138702371
|
|
|
|
key: train_jcc
|
|
value: [0.91612903 0.92434211 0.9248366 0.93092105 0.9248366 0.92833876
|
|
0.93159609 0.93729373 0.93442623 0.92258065]
|
|
|
|
mean value: 0.9275300850229801
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.08162332 1.38320208 1.09667015 1.27541614 1.16354775 1.22057319
|
|
0.954983 0.97117877 1.09513283 1.1121552 ]
|
|
|
|
mean value: 1.1354482412338256
|
|
|
|
key: score_time
|
|
value: [0.01517391 0.01828575 0.0150702 0.02177691 0.01645041 0.01536608
|
|
0.01509404 0.01963162 0.01525354 0.01510572]
|
|
|
|
mean value: 0.0167208194732666
|
|
|
|
key: test_mcc
|
|
value: [0.9701425 0.88531564 0.91144345 0.87689394 0.96966868 0.90814394
|
|
0.90805728 0.90814394 0.93844697 0.96966868]
|
|
|
|
mean value: 0.924592502510245
|
|
|
|
key: train_mcc
|
|
value: [0.98294088 0.98976686 0.98978431 0.98296998 0.98296998 0.95229969
|
|
0.98296978 0.98296978 0.98639408 0.98296978]
|
|
|
|
mean value: 0.9816035126690726
|
|
|
|
key: test_accuracy
|
|
value: [0.98484848 0.93939394 0.95384615 0.93846154 0.98461538 0.95384615
|
|
0.95384615 0.95384615 0.96923077 0.98461538]
|
|
|
|
mean value: 0.9616550116550117
|
|
|
|
key: train_accuracy
|
|
value: [0.99146758 0.99488055 0.99488927 0.99148211 0.99148211 0.97614991
|
|
0.99148211 0.99148211 0.99318569 0.99148211]
|
|
|
|
mean value: 0.990798355727916
|
|
|
|
key: test_fscore
|
|
value: [0.98507463 0.94285714 0.95652174 0.93939394 0.98507463 0.95384615
|
|
0.95238095 0.95384615 0.96875 0.98412698]
|
|
|
|
mean value: 0.9621872319313105
|
|
|
|
key: train_fscore
|
|
value: [0.99148211 0.99487179 0.99488927 0.99148211 0.99148211 0.97610922
|
|
0.99151104 0.99151104 0.99322034 0.99151104]
|
|
|
|
mean value: 0.9908070060602878
|
|
|
|
key: test_precision
|
|
value: [0.97058824 0.89189189 0.91666667 0.93939394 0.97058824 0.96875
|
|
0.96774194 0.93939394 0.96875 1. ]
|
|
|
|
mean value: 0.9533764843418544
|
|
|
|
key: train_precision
|
|
value: [0.98979592 0.99657534 0.99319728 0.98979592 0.98979592 0.97610922
|
|
0.98983051 0.98983051 0.98986486 0.98983051]
|
|
|
|
mean value: 0.9894625981785018
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.93939394 1. 0.93939394
|
|
0.9375 0.96875 0.96875 0.96875 ]
|
|
|
|
mean value: 0.9722537878787879
|
|
|
|
key: train_recall
|
|
value: [0.99317406 0.99317406 0.99658703 0.99317406 0.99317406 0.97610922
|
|
0.99319728 0.99319728 0.99659864 0.99319728]
|
|
|
|
mean value: 0.9921582967658052
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.93939394 0.953125 0.93844697 0.984375 0.95407197
|
|
0.95359848 0.95407197 0.96922348 0.984375 ]
|
|
|
|
mean value: 0.9615530303030303
|
|
|
|
key: train_roc_auc
|
|
value: [0.99146758 0.99488055 0.99489215 0.99148499 0.99148499 0.97614985
|
|
0.99147919 0.99147919 0.99317987 0.99147919]
|
|
|
|
mean value: 0.9907977525481182
|
|
|
|
key: test_jcc
|
|
value: [0.97058824 0.89189189 0.91666667 0.88571429 0.97058824 0.91176471
|
|
0.90909091 0.91176471 0.93939394 0.96875 ]
|
|
|
|
mean value: 0.9276213575110633
|
|
|
|
key: train_jcc
|
|
value: [0.98310811 0.98979592 0.98983051 0.98310811 0.98310811 0.95333333
|
|
0.98316498 0.98316498 0.98653199 0.98316498]
|
|
|
|
mean value: 0.9818311020526517
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01670432 0.01154232 0.0110631 0.01051259 0.01056409 0.01046848
|
|
0.01059175 0.01075506 0.01064587 0.0109458 ]
|
|
|
|
mean value: 0.011379337310791016
|
|
|
|
key: score_time
|
|
value: [0.0129385 0.01055431 0.00917029 0.00899935 0.00896192 0.0092535
|
|
0.00926399 0.00940728 0.00943923 0.00953221]
|
|
|
|
mean value: 0.00975205898284912
|
|
|
|
key: test_mcc
|
|
value: [0.60858062 0.70878358 0.81706198 0.60805838 0.66477003 0.60304138
|
|
0.83005736 0.75545058 0.72649867 0.60621087]
|
|
|
|
mean value: 0.692851345439512
|
|
|
|
key: train_mcc
|
|
value: [0.72013652 0.71010029 0.72068589 0.73786883 0.73157215 0.74496978
|
|
0.72748805 0.72748805 0.7005195 0.7071214 ]
|
|
|
|
mean value: 0.7227950458299315
|
|
|
|
key: test_accuracy
|
|
value: [0.8030303 0.84848485 0.90769231 0.8 0.83076923 0.8
|
|
0.90769231 0.87692308 0.86153846 0.8 ]
|
|
|
|
mean value: 0.8436130536130536
|
|
|
|
key: train_accuracy
|
|
value: [0.86006826 0.85494881 0.86030664 0.86882453 0.86541738 0.87223169
|
|
0.8637138 0.8637138 0.84497445 0.85349233]
|
|
|
|
mean value: 0.8607691681541476
|
|
|
|
key: test_fscore
|
|
value: [0.8115942 0.86111111 0.90625 0.78688525 0.82539683 0.79365079
|
|
0.91428571 0.87878788 0.86567164 0.77966102]
|
|
|
|
mean value: 0.8423294430772711
|
|
|
|
key: train_fscore
|
|
value: [0.86006826 0.85666105 0.86101695 0.87015177 0.86811352 0.86956522
|
|
0.86486486 0.86486486 0.85758998 0.85521886]
|
|
|
|
mean value: 0.8628115333955078
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.79487179 0.93548387 0.85714286 0.86666667 0.83333333
|
|
0.84210526 0.85294118 0.82857143 0.85185185]
|
|
|
|
mean value: 0.8440746020811936
|
|
|
|
key: train_precision
|
|
value: [0.86006826 0.84666667 0.85521886 0.86 0.8496732 0.88652482
|
|
0.8590604 0.8590604 0.7942029 0.84666667]
|
|
|
|
mean value: 0.8517142177167121
|
|
|
|
key: test_recall
|
|
value: [0.84848485 0.93939394 0.87878788 0.72727273 0.78787879 0.75757576
|
|
1. 0.90625 0.90625 0.71875 ]
|
|
|
|
mean value: 0.8470643939393939
|
|
|
|
key: train_recall
|
|
value: [0.86006826 0.8668942 0.8668942 0.88054608 0.88737201 0.85324232
|
|
0.8707483 0.8707483 0.93197279 0.86394558]
|
|
|
|
mean value: 0.8752432030832811
|
|
|
|
key: test_roc_auc
|
|
value: [0.8030303 0.84848485 0.90814394 0.80113636 0.83143939 0.80066288
|
|
0.90909091 0.87736742 0.86221591 0.79876894]
|
|
|
|
mean value: 0.8440340909090909
|
|
|
|
key: train_roc_auc
|
|
value: [0.86006826 0.85494881 0.86031785 0.86884447 0.86545471 0.87219939
|
|
0.86370179 0.86370179 0.84482599 0.8534745 ]
|
|
|
|
mean value: 0.8607537554270855
|
|
|
|
key: test_jcc
|
|
value: [0.68292683 0.75609756 0.82857143 0.64864865 0.7027027 0.65789474
|
|
0.84210526 0.78378378 0.76315789 0.63888889]
|
|
|
|
mean value: 0.7304777737576197
|
|
|
|
key: train_jcc
|
|
value: [0.75449102 0.74926254 0.75595238 0.77014925 0.76696165 0.76923077
|
|
0.76190476 0.76190476 0.75068493 0.74705882]
|
|
|
|
mean value: 0.758760088951491
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01092649 0.01078749 0.01086593 0.01097727 0.01108479 0.01093435
|
|
0.01094604 0.01094174 0.01101041 0.01098323]
|
|
|
|
mean value: 0.010945773124694825
|
|
|
|
key: score_time
|
|
value: [0.0093627 0.00943446 0.00938559 0.00923657 0.00925374 0.0092957
|
|
0.00910807 0.00916314 0.00904298 0.00911641]
|
|
|
|
mean value: 0.009239935874938964
|
|
|
|
key: test_mcc
|
|
value: [0.52388352 0.62994079 0.66477003 0.63068182 0.72348485 0.53838887
|
|
0.54131274 0.69223485 0.60191459 0.63068182]
|
|
|
|
mean value: 0.6177293864483187
|
|
|
|
key: train_mcc
|
|
value: [0.6694139 0.6387612 0.65435396 0.67018758 0.64787328 0.66085884
|
|
0.67328414 0.66469027 0.65721726 0.65981157]
|
|
|
|
mean value: 0.6596452005232071
|
|
|
|
key: test_accuracy
|
|
value: [0.75757576 0.8030303 0.83076923 0.81538462 0.86153846 0.76923077
|
|
0.76923077 0.84615385 0.8 0.81538462]
|
|
|
|
mean value: 0.8068298368298369
|
|
|
|
key: train_accuracy
|
|
value: [0.83447099 0.81911263 0.82623509 0.83475298 0.82282794 0.82964225
|
|
0.83645656 0.83134583 0.82793867 0.82964225]
|
|
|
|
mean value: 0.8292425185038752
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.82666667 0.82539683 0.81818182 0.86153846 0.7761194
|
|
0.7761194 0.84375 0.78688525 0.8125 ]
|
|
|
|
mean value: 0.8104935601433338
|
|
|
|
key: train_fscore
|
|
value: [0.83752094 0.82274247 0.83223684 0.83806344 0.8295082 0.8349835
|
|
0.83946488 0.83797054 0.83360791 0.83333333]
|
|
|
|
mean value: 0.8339432053299032
|
|
|
|
key: test_precision
|
|
value: [0.71794872 0.73809524 0.86666667 0.81818182 0.875 0.76470588
|
|
0.74285714 0.84375 0.82758621 0.8125 ]
|
|
|
|
mean value: 0.8007291672999077
|
|
|
|
key: train_precision
|
|
value: [0.82236842 0.80655738 0.8031746 0.82026144 0.79810726 0.80830671
|
|
0.82565789 0.80757098 0.80830671 0.81699346]
|
|
|
|
mean value: 0.8117304849942879
|
|
|
|
key: test_recall
|
|
value: [0.84848485 0.93939394 0.78787879 0.81818182 0.84848485 0.78787879
|
|
0.8125 0.84375 0.75 0.8125 ]
|
|
|
|
mean value: 0.8249053030303031
|
|
|
|
key: train_recall
|
|
value: [0.85324232 0.83959044 0.86348123 0.85665529 0.86348123 0.86348123
|
|
0.8537415 0.8707483 0.86054422 0.85034014]
|
|
|
|
mean value: 0.8575305890274199
|
|
|
|
key: test_roc_auc
|
|
value: [0.75757576 0.8030303 0.83143939 0.81534091 0.86174242 0.76893939
|
|
0.76988636 0.84611742 0.79924242 0.81534091]
|
|
|
|
mean value: 0.8068655303030303
|
|
|
|
key: train_roc_auc
|
|
value: [0.83447099 0.81911263 0.82629844 0.83479023 0.82289708 0.8296998
|
|
0.83642706 0.83127859 0.82788303 0.82960693]
|
|
|
|
mean value: 0.8292464767476957
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.70454545 0.7027027 0.69230769 0.75675676 0.63414634
|
|
0.63414634 0.72972973 0.64864865 0.68421053]
|
|
|
|
mean value: 0.682355783029724
|
|
|
|
key: train_jcc
|
|
value: [0.7204611 0.69886364 0.71267606 0.72126437 0.70868347 0.71671388
|
|
0.72334294 0.72112676 0.71468927 0.71428571]
|
|
|
|
mean value: 0.7152107189894893
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01015234 0.00999308 0.01089859 0.01132965 0.01142859 0.01188827
|
|
0.01054168 0.01178813 0.01112437 0.01109695]
|
|
|
|
mean value: 0.011024165153503417
|
|
|
|
key: score_time
|
|
value: [0.01811504 0.01828551 0.01806641 0.01871514 0.01840639 0.01976562
|
|
0.01915121 0.01830125 0.01836991 0.01812267]
|
|
|
|
mean value: 0.01852991580963135
|
|
|
|
key: test_mcc
|
|
value: [0.78824078 0.72760688 0.66193182 0.69223485 0.78763191 0.60037879
|
|
0.64271802 0.94017476 0.73234704 0.78822732]
|
|
|
|
mean value: 0.7361492160836447
|
|
|
|
key: train_mcc
|
|
value: [0.79942206 0.82942883 0.8364774 0.8336452 0.8127293 0.85353905
|
|
0.81260956 0.80254528 0.81261173 0.84328817]
|
|
|
|
mean value: 0.8236296598209656
|
|
|
|
key: test_accuracy
|
|
value: [0.89393939 0.86363636 0.83076923 0.84615385 0.89230769 0.8
|
|
0.81538462 0.96923077 0.86153846 0.89230769]
|
|
|
|
mean value: 0.8665268065268066
|
|
|
|
key: train_accuracy
|
|
value: [0.89931741 0.91467577 0.91822828 0.9165247 0.90630324 0.92674617
|
|
0.90630324 0.9011925 0.90630324 0.92163543]
|
|
|
|
mean value: 0.911722997133571
|
|
|
|
key: test_fscore
|
|
value: [0.89230769 0.86567164 0.83076923 0.84848485 0.89855072 0.8
|
|
0.82857143 0.96774194 0.86956522 0.89552239]
|
|
|
|
mean value: 0.8697185107496803
|
|
|
|
key: train_fscore
|
|
value: [0.9015025 0.91525424 0.91836735 0.91792295 0.90693739 0.92699491
|
|
0.90662139 0.9023569 0.90630324 0.9220339 ]
|
|
|
|
mean value: 0.912429476699208
|
|
|
|
key: test_precision
|
|
value: [0.90625 0.85294118 0.84375 0.84848485 0.86111111 0.8125
|
|
0.76315789 1. 0.81081081 0.85714286]
|
|
|
|
mean value: 0.8556148698757058
|
|
|
|
key: train_precision
|
|
value: [0.88235294 0.90909091 0.91525424 0.90131579 0.89932886 0.9222973
|
|
0.90508475 0.89333333 0.90784983 0.91891892]
|
|
|
|
mean value: 0.90548268607534
|
|
|
|
key: test_recall
|
|
value: [0.87878788 0.87878788 0.81818182 0.84848485 0.93939394 0.78787879
|
|
0.90625 0.9375 0.9375 0.9375 ]
|
|
|
|
mean value: 0.8870265151515152
|
|
|
|
key: train_recall
|
|
value: [0.92150171 0.92150171 0.92150171 0.93515358 0.91467577 0.93174061
|
|
0.90816327 0.91156463 0.9047619 0.92517007]
|
|
|
|
mean value: 0.919573494926981
|
|
|
|
key: test_roc_auc
|
|
value: [0.89393939 0.86363636 0.83096591 0.84611742 0.89157197 0.80018939
|
|
0.81676136 0.96875 0.86268939 0.89299242]
|
|
|
|
mean value: 0.8667613636363637
|
|
|
|
key: train_roc_auc
|
|
value: [0.89931741 0.91467577 0.91823385 0.91655638 0.90631748 0.92675466
|
|
0.90630006 0.9011748 0.90630587 0.9216294 ]
|
|
|
|
mean value: 0.9117265677602099
|
|
|
|
key: test_jcc
|
|
value: [0.80555556 0.76315789 0.71052632 0.73684211 0.81578947 0.66666667
|
|
0.70731707 0.9375 0.76923077 0.81081081]
|
|
|
|
mean value: 0.7723396664908219
|
|
|
|
key: train_jcc
|
|
value: [0.82066869 0.84375 0.8490566 0.84829721 0.82972136 0.86392405
|
|
0.82919255 0.82208589 0.82866044 0.85534591]
|
|
|
|
mean value: 0.8390702707508169
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02719641 0.02511859 0.02477622 0.02495646 0.02513075 0.02472091
|
|
0.02505088 0.02563429 0.02554011 0.02485394]
|
|
|
|
mean value: 0.02529785633087158
|
|
|
|
key: score_time
|
|
value: [0.01269245 0.01236415 0.01242304 0.01249981 0.01250362 0.0125668
|
|
0.01256847 0.01278996 0.01274538 0.01266146]
|
|
|
|
mean value: 0.012581515312194824
|
|
|
|
key: test_mcc
|
|
value: [0.81818182 0.79708114 0.91144345 0.75545058 0.84953768 0.78503788
|
|
0.61558566 0.84995597 0.87689394 0.84659091]
|
|
|
|
mean value: 0.8105759022960609
|
|
|
|
key: train_mcc
|
|
value: [0.86446862 0.86730345 0.88088051 0.87752971 0.87455731 0.87153016
|
|
0.88781155 0.8677218 0.86420732 0.85786412]
|
|
|
|
mean value: 0.8713874554555745
|
|
|
|
key: test_accuracy
|
|
value: [0.90909091 0.89393939 0.95384615 0.87692308 0.92307692 0.89230769
|
|
0.8 0.92307692 0.93846154 0.92307692]
|
|
|
|
mean value: 0.9033799533799534
|
|
|
|
key: train_accuracy
|
|
value: [0.93174061 0.9334471 0.94037479 0.93867121 0.93696763 0.93526405
|
|
0.94378194 0.93356048 0.9318569 0.92844974]
|
|
|
|
mean value: 0.935411445947753
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.90140845 0.95652174 0.875 0.92753623 0.89230769
|
|
0.81690141 0.92537313 0.9375 0.92307692]
|
|
|
|
mean value: 0.9064716488973305
|
|
|
|
key: train_fscore
|
|
value: [0.93333333 0.93445378 0.94077834 0.93918919 0.93802345 0.93666667
|
|
0.94453782 0.93489149 0.93311037 0.93023256]
|
|
|
|
mean value: 0.9365216990049874
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.84210526 0.91666667 0.90322581 0.88888889 0.90625
|
|
0.74358974 0.88571429 0.9375 0.90909091]
|
|
|
|
mean value: 0.8842122472650911
|
|
|
|
key: train_precision
|
|
value: [0.91205212 0.9205298 0.93288591 0.92976589 0.92105263 0.91530945
|
|
0.93355482 0.91803279 0.91776316 0.90909091]
|
|
|
|
mean value: 0.9210037459895899
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.96969697 1. 0.84848485 0.96969697 0.87878788
|
|
0.90625 0.96875 0.9375 0.9375 ]
|
|
|
|
mean value: 0.9325757575757576
|
|
|
|
key: train_recall
|
|
value: [0.9556314 0.94880546 0.94880546 0.94880546 0.9556314 0.95904437
|
|
0.95578231 0.95238095 0.94897959 0.95238095]
|
|
|
|
mean value: 0.9526247359011863
|
|
|
|
key: test_roc_auc
|
|
value: [0.90909091 0.89393939 0.953125 0.87736742 0.92234848 0.89251894
|
|
0.80160985 0.92376894 0.93844697 0.92329545]
|
|
|
|
mean value: 0.9035511363636364
|
|
|
|
key: train_roc_auc
|
|
value: [0.93174061 0.9334471 0.94038912 0.93868844 0.93699937 0.9353045
|
|
0.94376146 0.93352836 0.93182768 0.92840891]
|
|
|
|
mean value: 0.9354095563139931
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.82051282 0.91666667 0.77777778 0.86486486 0.80555556
|
|
0.69047619 0.86111111 0.88235294 0.85714286]
|
|
|
|
mean value: 0.8309794118617648
|
|
|
|
key: train_jcc
|
|
value: [0.875 0.87697161 0.88817891 0.88535032 0.88328076 0.88087774
|
|
0.89490446 0.87774295 0.87460815 0.86956522]
|
|
|
|
mean value: 0.8806480114255378
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.88436723 2.62071109 2.547189 2.83125925 5.8814919 4.36691046
|
|
3.61290646 4.35738778 4.10116959 4.1741395 ]
|
|
|
|
mean value: 3.637753224372864
|
|
|
|
key: score_time
|
|
value: [0.01280355 0.01515627 0.02444124 0.03257608 0.0188086 0.01465392
|
|
0.01385593 0.01601386 0.0129416 0.01285648]
|
|
|
|
mean value: 0.017410755157470703
|
|
|
|
key: test_mcc
|
|
value: [0.88040627 0.88531564 0.87844611 0.84995597 0.90805728 0.87689394
|
|
0.84659091 0.87867338 0.81534091 0.96966868]
|
|
|
|
mean value: 0.8789349091200562
|
|
|
|
key: train_mcc
|
|
value: [0.98976686 0.99659284 0.99318567 0.98637134 0.99318567 0.99318567
|
|
0.98978419 0.99318567 0.9965986 0.98978419]
|
|
|
|
mean value: 0.9921640695092957
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 0.93939394 0.93846154 0.92307692 0.95384615 0.93846154
|
|
0.92307692 0.93846154 0.90769231 0.98461538]
|
|
|
|
mean value: 0.9386480186480187
|
|
|
|
key: train_accuracy
|
|
value: [0.99488055 0.99829352 0.99659284 0.99318569 0.99659284 0.99659284
|
|
0.99488927 0.99659284 0.99829642 0.99488927]
|
|
|
|
mean value: 0.9960806088690687
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.94285714 0.94117647 0.92063492 0.95522388 0.93939394
|
|
0.92307692 0.93939394 0.90625 0.98412698]
|
|
|
|
mean value: 0.93896342006691
|
|
|
|
key: train_fscore
|
|
value: [0.99488927 0.99829642 0.99658703 0.99317406 0.99658703 0.99658703
|
|
0.99490662 0.99659864 0.99830221 0.99490662]
|
|
|
|
mean value: 0.9960834932903403
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.89189189 0.91428571 0.96666667 0.94117647 0.93939394
|
|
0.90909091 0.91176471 0.90625 1. ]
|
|
|
|
mean value: 0.9348262233283581
|
|
|
|
key: train_precision
|
|
value: [0.99319728 0.99659864 0.99658703 0.99317406 0.99658703 0.99658703
|
|
0.99322034 0.99659864 0.99661017 0.99322034]
|
|
|
|
mean value: 0.9952380558864374
|
|
|
|
key: test_recall
|
|
value: [0.90909091 1. 0.96969697 0.87878788 0.96969697 0.93939394
|
|
0.9375 0.96875 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9447916666666667
|
|
|
|
key: train_recall
|
|
value: [0.99658703 1. 0.99658703 0.99317406 0.99658703 0.99658703
|
|
0.99659864 0.99659864 1. 0.99659864]
|
|
|
|
mean value: 0.9969318102667688
|
|
|
|
key: test_roc_auc
|
|
value: [0.93939394 0.93939394 0.93797348 0.92376894 0.95359848 0.93844697
|
|
0.92329545 0.93892045 0.90767045 0.984375 ]
|
|
|
|
mean value: 0.9386837121212122
|
|
|
|
key: train_roc_auc
|
|
value: [0.99488055 0.99829352 0.99659284 0.99318567 0.99659284 0.99659284
|
|
0.99488635 0.99659284 0.99829352 0.99488635]
|
|
|
|
mean value: 0.9960797288198555
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.89189189 0.88888889 0.85294118 0.91428571 0.88571429
|
|
0.85714286 0.88571429 0.82857143 0.96875 ]
|
|
|
|
mean value: 0.8856253469856411
|
|
|
|
key: train_jcc
|
|
value: [0.98983051 0.99659864 0.99319728 0.98644068 0.99319728 0.99319728
|
|
0.98986486 0.99322034 0.99661017 0.98986486]
|
|
|
|
mean value: 0.992202190083546
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0583663 0.03681612 0.04428267 0.03703952 0.04416752 0.04544234
|
|
0.04772806 0.04291558 0.04592967 0.04700685]
|
|
|
|
mean value: 0.04496946334838867
|
|
|
|
key: score_time
|
|
value: [0.01126266 0.0109055 0.01073933 0.01008487 0.01071358 0.01080036
|
|
0.01077509 0.01093245 0.01079464 0.01084924]
|
|
|
|
mean value: 0.010785770416259766
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.90805728 0.81706198 0.96966868 0.94017476
|
|
0.93844697 0.87689394 0.94017476 0.94028478]
|
|
|
|
mean value: 0.9216078784457992
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.95384615 0.90769231 0.98461538 0.96923077
|
|
0.96923077 0.93846154 0.96923077 0.96923077]
|
|
|
|
mean value: 0.9600932400932402
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.95522388 0.90625 0.98507463 0.97058824
|
|
0.96875 0.9375 0.96774194 0.96969697]
|
|
|
|
mean value: 0.9603682790794787
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.94117647 0.93548387 0.97058824 0.94285714
|
|
0.96875 0.9375 1. 0.94117647]
|
|
|
|
mean value: 0.9529424082187364
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96969697 0.87878788 1. 1.
|
|
0.96875 0.9375 0.9375 1. ]
|
|
|
|
mean value: 0.9692234848484849
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.95359848 0.90814394 0.984375 0.96875
|
|
0.96922348 0.93844697 0.96875 0.96969697]
|
|
|
|
mean value: 0.9600378787878788
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.91428571 0.82857143 0.97058824 0.94285714
|
|
0.93939394 0.88235294 0.9375 0.94117647]
|
|
|
|
mean value: 0.9248617764058941
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16167355 0.15977597 0.16011739 0.16007113 0.16181588 0.15370274
|
|
0.16055512 0.16027236 0.16169715 0.16143322]
|
|
|
|
mean value: 0.1601114511489868
|
|
|
|
key: score_time
|
|
value: [0.02137733 0.02141595 0.02142334 0.02139664 0.02146602 0.02143741
|
|
0.0216198 0.0215838 0.02148175 0.02147436]
|
|
|
|
mean value: 0.02146763801574707
|
|
|
|
key: test_mcc
|
|
value: [0.88040627 0.85201287 0.87844611 0.82191818 0.84953768 0.79449138
|
|
0.84659091 0.84953768 0.84644588 0.93844697]
|
|
|
|
mean value: 0.8557833923720416
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 0.92424242 0.93846154 0.90769231 0.92307692 0.89230769
|
|
0.92307692 0.92307692 0.92307692 0.96923077]
|
|
|
|
mean value: 0.9263636363636364
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.92753623 0.94117647 0.90322581 0.92753623 0.8852459
|
|
0.92307692 0.91803279 0.92063492 0.96875 ]
|
|
|
|
mean value: 0.9252715273044397
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96774194 0.88888889 0.91428571 0.96551724 0.88888889 0.96428571
|
|
0.90909091 0.96551724 0.93548387 0.96875 ]
|
|
|
|
mean value: 0.9368450404650349
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.96969697 0.96969697 0.84848485 0.96969697 0.81818182
|
|
0.9375 0.875 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9172348484848485
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93939394 0.92424242 0.93797348 0.90861742 0.92234848 0.89346591
|
|
0.92329545 0.92234848 0.92282197 0.96922348]
|
|
|
|
mean value: 0.9263731060606061
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.86486486 0.88888889 0.82352941 0.86486486 0.79411765
|
|
0.85714286 0.84848485 0.85294118 0.93939394]
|
|
|
|
mean value: 0.8616581440110852
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01344705 0.01317191 0.01322961 0.01330113 0.01336503 0.01348329
|
|
0.01328564 0.01326752 0.01366425 0.01333809]
|
|
|
|
mean value: 0.013355350494384766
|
|
|
|
key: score_time
|
|
value: [0.01062298 0.01056838 0.01057148 0.01060891 0.01068354 0.01066709
|
|
0.01063323 0.01060557 0.01065063 0.01063728]
|
|
|
|
mean value: 0.010624909400939941
|
|
|
|
key: test_mcc
|
|
value: [0.63753558 0.63753558 0.63153153 0.60037879 0.54981488 0.60191459
|
|
0.73110376 0.51053958 0.63482825 0.57061637]
|
|
|
|
mean value: 0.6105798900474947
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.81818182 0.81538462 0.8 0.76923077 0.8
|
|
0.86153846 0.75384615 0.81538462 0.78461538]
|
|
|
|
mean value: 0.8036363636363637
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.82352941 0.82352941 0.8 0.79452055 0.8115942
|
|
0.84745763 0.73333333 0.8 0.78787879]
|
|
|
|
mean value: 0.8045372734468639
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.8 0.8 0.8125 0.725 0.77777778
|
|
0.92592593 0.78571429 0.85714286 0.76470588]
|
|
|
|
mean value: 0.8048766728913788
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84848485 0.84848485 0.84848485 0.78787879 0.87878788 0.84848485
|
|
0.78125 0.6875 0.75 0.8125 ]
|
|
|
|
mean value: 0.8091856060606061
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.81818182 0.81818182 0.81486742 0.80018939 0.76751894 0.79924242
|
|
0.86032197 0.75284091 0.81439394 0.78503788]
|
|
|
|
mean value: 0.8030776515151515
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.7 0.7 0.66666667 0.65909091 0.68292683
|
|
0.73529412 0.57894737 0.66666667 0.65 ]
|
|
|
|
mean value: 0.6739592557760646
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.59411907 2.52079487 2.54728317 2.55361605 3.08455682 2.76037025
|
|
2.66518021 3.56287289 2.2605269 2.20357037]
|
|
|
|
mean value: 2.6752890586853026
|
|
|
|
key: score_time
|
|
value: [0.11118126 0.11067724 0.11062789 0.11689639 0.1420486 0.10095429
|
|
0.22337675 0.1026926 0.09578085 0.09630704]
|
|
|
|
mean value: 0.12105429172515869
|
|
|
|
key: test_mcc
|
|
value: [0.9701425 0.88531564 0.96966868 0.78822732 0.96966868 0.94028478
|
|
0.96966868 0.87689394 0.94017476 0.96969697]
|
|
|
|
mean value: 0.9279741955034888
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98484848 0.93939394 0.98461538 0.89230769 0.98461538 0.96923077
|
|
0.98461538 0.93846154 0.96923077 0.98461538]
|
|
|
|
mean value: 0.9631934731934733
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98461538 0.94285714 0.98507463 0.88888889 0.98507463 0.96875
|
|
0.98412698 0.9375 0.96774194 0.98461538]
|
|
|
|
mean value: 0.9629244974318999
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.97058824 0.93333333 0.97058824 1.
|
|
1. 0.9375 1. 0.96969697]
|
|
|
|
mean value: 0.967359866551043
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96969697 1. 1. 0.84848485 1. 0.93939394
|
|
0.96875 0.9375 0.9375 1. ]
|
|
|
|
mean value: 0.9601325757575758
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.93939394 0.984375 0.89299242 0.984375 0.96969697
|
|
0.984375 0.93844697 0.96875 0.98484848]
|
|
|
|
mean value: 0.9632102272727273
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96969697 0.89189189 0.97058824 0.8 0.97058824 0.93939394
|
|
0.96875 0.88235294 0.9375 0.96969697]
|
|
|
|
mean value: 0.9300459182444477
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.84621882 1.85790062 1.10967755 1.08481646 1.06378484 1.09557199
|
|
1.08685064 1.09259677 1.09036088 1.1133182 ]
|
|
|
|
mean value: 1.3441096782684325
|
|
|
|
key: score_time
|
|
value: [0.33816075 0.17422271 0.1956923 0.26042914 0.24330926 0.25248599
|
|
0.19835854 0.27451754 0.25211048 0.27057958]
|
|
|
|
mean value: 0.24598662853240966
|
|
|
|
key: test_mcc
|
|
value: [0.9701425 0.88531564 0.96966868 0.81706198 1. 0.94028478
|
|
0.96966868 0.94028478 0.91144345 0.90805728]
|
|
|
|
mean value: 0.9311927780824278
|
|
|
|
key: train_mcc
|
|
value: [0.97616038 0.97952218 0.96938669 0.97620126 0.97283366 0.97283366
|
|
0.96601728 0.96934096 0.96947532 0.96938563]
|
|
|
|
mean value: 0.9721157032553221
|
|
|
|
key: test_accuracy
|
|
value: [0.98484848 0.93939394 0.98461538 0.90769231 1. 0.96923077
|
|
0.98461538 0.96923077 0.95384615 0.95384615]
|
|
|
|
mean value: 0.9647319347319347
|
|
|
|
key: train_accuracy
|
|
value: [0.98805461 0.98976109 0.9846678 0.98807496 0.98637138 0.98637138
|
|
0.98296422 0.9846678 0.9846678 0.9846678 ]
|
|
|
|
mean value: 0.9860268851277102
|
|
|
|
key: test_fscore
|
|
value: [0.98461538 0.94285714 0.98507463 0.90625 1. 0.96875
|
|
0.98412698 0.96969697 0.95081967 0.95238095]
|
|
|
|
mean value: 0.9644571732674253
|
|
|
|
key: train_fscore
|
|
value: [0.98811545 0.98976109 0.98471986 0.98811545 0.98644068 0.98644068
|
|
0.98310811 0.98471986 0.98482293 0.98477157]
|
|
|
|
mean value: 0.986101569221062
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.97058824 0.93548387 1. 1.
|
|
1. 0.94117647 1. 0.96774194]
|
|
|
|
mean value: 0.9706882404225857
|
|
|
|
key: train_precision
|
|
value: [0.98310811 0.98976109 0.97972973 0.98310811 0.97979798 0.97979798
|
|
0.97651007 0.98305085 0.97658863 0.97979798]
|
|
|
|
mean value: 0.9811250520824318
|
|
|
|
key: test_recall
|
|
value: [0.96969697 1. 1. 0.87878788 1. 0.93939394
|
|
0.96875 1. 0.90625 0.9375 ]
|
|
|
|
mean value: 0.9600378787878788
|
|
|
|
key: train_recall
|
|
value: [0.99317406 0.98976109 0.98976109 0.99317406 0.99317406 0.99317406
|
|
0.98979592 0.98639456 0.99319728 0.98979592]
|
|
|
|
mean value: 0.9911402103503517
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.93939394 0.984375 0.90814394 1. 0.96969697
|
|
0.984375 0.96969697 0.953125 0.95359848]
|
|
|
|
mean value: 0.9647253787878788
|
|
|
|
key: train_roc_auc
|
|
value: [0.98805461 0.98976109 0.98467646 0.98808363 0.98638295 0.98638295
|
|
0.98295257 0.98466486 0.98465325 0.98465905]
|
|
|
|
mean value: 0.9860271412319194
|
|
|
|
key: test_jcc
|
|
value: [0.96969697 0.89189189 0.97058824 0.82857143 1. 0.93939394
|
|
0.96875 0.94117647 0.90625 0.90909091]
|
|
|
|
mean value: 0.9325409844527491
|
|
|
|
key: train_jcc
|
|
value: [0.97651007 0.97972973 0.96989967 0.97651007 0.97324415 0.97324415
|
|
0.96677741 0.96989967 0.97009967 0.97 ]
|
|
|
|
mean value: 0.9725914565787938
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01303124 0.01310182 0.01105499 0.01080608 0.01093006 0.01085377
|
|
0.01200366 0.0113945 0.01290417 0.01115561]
|
|
|
|
mean value: 0.011723589897155762
|
|
|
|
key: score_time
|
|
value: [0.01087046 0.01089787 0.00959611 0.00923371 0.00975561 0.00923085
|
|
0.01004624 0.0104599 0.00927067 0.00921893]
|
|
|
|
mean value: 0.009858036041259765
|
|
|
|
key: test_mcc
|
|
value: [0.52388352 0.62994079 0.66477003 0.63068182 0.72348485 0.53838887
|
|
0.54131274 0.69223485 0.60191459 0.63068182]
|
|
|
|
mean value: 0.6177293864483187
|
|
|
|
key: train_mcc
|
|
value: [0.6694139 0.6387612 0.65435396 0.67018758 0.64787328 0.66085884
|
|
0.67328414 0.66469027 0.65721726 0.65981157]
|
|
|
|
mean value: 0.6596452005232071
|
|
|
|
key: test_accuracy
|
|
value: [0.75757576 0.8030303 0.83076923 0.81538462 0.86153846 0.76923077
|
|
0.76923077 0.84615385 0.8 0.81538462]
|
|
|
|
mean value: 0.8068298368298369
|
|
|
|
key: train_accuracy
|
|
value: [0.83447099 0.81911263 0.82623509 0.83475298 0.82282794 0.82964225
|
|
0.83645656 0.83134583 0.82793867 0.82964225]
|
|
|
|
mean value: 0.8292425185038752
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.82666667 0.82539683 0.81818182 0.86153846 0.7761194
|
|
0.7761194 0.84375 0.78688525 0.8125 ]
|
|
|
|
mean value: 0.8104935601433338
|
|
|
|
key: train_fscore
|
|
value: [0.83752094 0.82274247 0.83223684 0.83806344 0.8295082 0.8349835
|
|
0.83946488 0.83797054 0.83360791 0.83333333]
|
|
|
|
mean value: 0.8339432053299032
|
|
|
|
key: test_precision
|
|
value: [0.71794872 0.73809524 0.86666667 0.81818182 0.875 0.76470588
|
|
0.74285714 0.84375 0.82758621 0.8125 ]
|
|
|
|
mean value: 0.8007291672999077
|
|
|
|
key: train_precision
|
|
value: [0.82236842 0.80655738 0.8031746 0.82026144 0.79810726 0.80830671
|
|
0.82565789 0.80757098 0.80830671 0.81699346]
|
|
|
|
mean value: 0.8117304849942879
|
|
|
|
key: test_recall
|
|
value: [0.84848485 0.93939394 0.78787879 0.81818182 0.84848485 0.78787879
|
|
0.8125 0.84375 0.75 0.8125 ]
|
|
|
|
mean value: 0.8249053030303031
|
|
|
|
key: train_recall
|
|
value: [0.85324232 0.83959044 0.86348123 0.85665529 0.86348123 0.86348123
|
|
0.8537415 0.8707483 0.86054422 0.85034014]
|
|
|
|
mean value: 0.8575305890274199
|
|
|
|
key: test_roc_auc
|
|
value: [0.75757576 0.8030303 0.83143939 0.81534091 0.86174242 0.76893939
|
|
0.76988636 0.84611742 0.79924242 0.81534091]
|
|
|
|
mean value: 0.8068655303030303
|
|
|
|
key: train_roc_auc
|
|
value: [0.83447099 0.81911263 0.82629844 0.83479023 0.82289708 0.8296998
|
|
0.83642706 0.83127859 0.82788303 0.82960693]
|
|
|
|
mean value: 0.8292464767476957
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.70454545 0.7027027 0.69230769 0.75675676 0.63414634
|
|
0.63414634 0.72972973 0.64864865 0.68421053]
|
|
|
|
mean value: 0.682355783029724
|
|
|
|
key: train_jcc
|
|
value: [0.7204611 0.69886364 0.71267606 0.72126437 0.70868347 0.71671388
|
|
0.72334294 0.72112676 0.71468927 0.71428571]
|
|
|
|
mean value: 0.7152107189894893
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.10618877 0.09051251 0.19730282 0.09614396 0.11167979 0.09417415
|
|
0.09410381 0.12005615 0.11551499 0.11230063]
|
|
|
|
mean value: 0.11379776000976563
|
|
|
|
key: score_time
|
|
value: [0.01352429 0.01269913 0.01121426 0.01182103 0.01217675 0.01148033
|
|
0.01145482 0.01174784 0.01183248 0.01157618]
|
|
|
|
mean value: 0.011952710151672364
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.93844697 0.90805728 0.93844697 1.
|
|
0.96966868 0.94028478 0.96966868 1. ]
|
|
|
|
mean value: 0.9549889006803033
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.96923077 0.95384615 0.96923077 1.
|
|
0.98461538 0.96923077 0.98461538 1. ]
|
|
|
|
mean value: 0.9770163170163171
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.96969697 0.95522388 0.96969697 1.
|
|
0.98412698 0.96969697 0.98412698 1. ]
|
|
|
|
mean value: 0.9775425900799035
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.96969697 0.94117647 0.96969697 1.
|
|
1. 0.94117647 1. 1. ]
|
|
|
|
mean value: 0.9713638772462302
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96969697 0.96969697 0.96969697 1.
|
|
0.96875 1. 0.96875 1. ]
|
|
|
|
mean value: 0.9846590909090909
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.96922348 0.95359848 0.96922348 1.
|
|
0.984375 0.96969697 0.984375 1. ]
|
|
|
|
mean value: 0.9769886363636364
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.94117647 0.91428571 0.94117647 1.
|
|
0.96875 0.94117647 0.96875 1. ]
|
|
|
|
mean value: 0.9567207017942312
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0472343 0.06465888 0.0483613 0.08760691 0.0638423 0.06322742
|
|
0.0496552 0.10060954 0.09633088 0.09051538]
|
|
|
|
mean value: 0.07120420932769775
|
|
|
|
key: score_time
|
|
value: [0.01958227 0.01240706 0.01249146 0.01987028 0.01252627 0.01251197
|
|
0.0195992 0.01977491 0.01979899 0.01302886]
|
|
|
|
mean value: 0.01615912914276123
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 0.85839508 0.90805728 0.94028478 0.93844697 0.94017476
|
|
0.91144345 0.84995597 0.90805728 0.96966868]
|
|
|
|
mean value: 0.9165608191795214
|
|
|
|
key: train_mcc
|
|
value: [0.96250779 0.97952218 0.98301582 0.96601886 0.96595117 0.96934132
|
|
0.95920216 0.96595038 0.96601728 0.95913582]
|
|
|
|
mean value: 0.9676662791940402
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.92424242 0.95384615 0.96923077 0.96923077 0.96923077
|
|
0.95384615 0.92307692 0.95384615 0.98461538]
|
|
|
|
mean value: 0.9570862470862471
|
|
|
|
key: train_accuracy
|
|
value: [0.98122867 0.98976109 0.99148211 0.98296422 0.98296422 0.9846678
|
|
0.97955707 0.98296422 0.98296422 0.97955707]
|
|
|
|
mean value: 0.9838110715095557
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 0.92957746 0.95522388 0.96875 0.96969697 0.97058824
|
|
0.95081967 0.92537313 0.95238095 0.98412698]
|
|
|
|
mean value: 0.9577125528638395
|
|
|
|
key: train_fscore
|
|
value: [0.98132428 0.98976109 0.99151104 0.98305085 0.9829932 0.9846678
|
|
0.97972973 0.98305085 0.98310811 0.97966102]
|
|
|
|
mean value: 0.9838857955608016
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.86842105 0.94117647 1. 0.96969697 0.94285714
|
|
1. 0.88571429 0.96774194 1. ]
|
|
|
|
mean value: 0.9518464999829226
|
|
|
|
key: train_precision
|
|
value: [0.97635135 0.98976109 0.98648649 0.97643098 0.97966102 0.9829932
|
|
0.97315436 0.97972973 0.97651007 0.97635135]
|
|
|
|
mean value: 0.9797429631258331
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96969697 0.93939394 0.96969697 1.
|
|
0.90625 0.96875 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9660037878787879
|
|
|
|
key: train_recall
|
|
value: [0.98634812 0.98976109 0.99658703 0.98976109 0.98634812 0.98634812
|
|
0.98639456 0.98639456 0.98979592 0.9829932 ]
|
|
|
|
mean value: 0.9880731814910264
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.92424242 0.95359848 0.96969697 0.96922348 0.96875
|
|
0.953125 0.92376894 0.95359848 0.984375 ]
|
|
|
|
mean value: 0.9570075757575758
|
|
|
|
key: train_roc_auc
|
|
value: [0.98122867 0.98976109 0.99149079 0.98297578 0.98296998 0.98467066
|
|
0.9795454 0.98295837 0.98295257 0.97955121]
|
|
|
|
mean value: 0.9838104525086485
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 0.86842105 0.91428571 0.93939394 0.94117647 0.94285714
|
|
0.90625 0.86111111 0.90909091 0.96875 ]
|
|
|
|
mean value: 0.9194193482815773
|
|
|
|
key: train_jcc
|
|
value: [0.96333333 0.97972973 0.98316498 0.96666667 0.96655518 0.96979866
|
|
0.9602649 0.96666667 0.96677741 0.96013289]
|
|
|
|
mean value: 0.9683090420891562
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01097393 0.01111913 0.01158977 0.01039386 0.01048326 0.01046848
|
|
0.01031518 0.01036549 0.01155186 0.0104816 ]
|
|
|
|
mean value: 0.01077425479888916
|
|
|
|
key: score_time
|
|
value: [0.00955105 0.00978112 0.00976467 0.00874472 0.00880885 0.00883722
|
|
0.00887394 0.00886011 0.00992799 0.00892401]
|
|
|
|
mean value: 0.009207367897033691
|
|
|
|
key: test_mcc
|
|
value: [0.58003439 0.64109064 0.78763191 0.63222777 0.69326017 0.57268392
|
|
0.66477003 0.76001241 0.69326017 0.69223485]
|
|
|
|
mean value: 0.6717206249618696
|
|
|
|
key: train_mcc
|
|
value: [0.69673692 0.68326672 0.68347159 0.69045282 0.65968626 0.68734858
|
|
0.67677421 0.69041333 0.67717627 0.69082395]
|
|
|
|
mean value: 0.6836150647547126
|
|
|
|
key: test_accuracy
|
|
value: [0.78787879 0.81818182 0.89230769 0.81538462 0.84615385 0.78461538
|
|
0.83076923 0.87692308 0.84615385 0.84615385]
|
|
|
|
mean value: 0.8344522144522144
|
|
|
|
key: train_accuracy
|
|
value: [0.84812287 0.84129693 0.84156729 0.84497445 0.82964225 0.84327087
|
|
0.83816014 0.84497445 0.83816014 0.84497445]
|
|
|
|
mean value: 0.8415143815664773
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.82857143 0.89855072 0.8125 0.85294118 0.8
|
|
0.8358209 0.88235294 0.83870968 0.84375 ]
|
|
|
|
mean value: 0.8393196843797912
|
|
|
|
key: train_fscore
|
|
value: [0.85092127 0.84474124 0.84369748 0.84757119 0.83221477 0.84666667
|
|
0.84140234 0.84808013 0.84245439 0.84908789]
|
|
|
|
mean value: 0.8446837367804668
|
|
|
|
key: test_precision
|
|
value: [0.75675676 0.78378378 0.86111111 0.83870968 0.82857143 0.75675676
|
|
0.8 0.83333333 0.86666667 0.84375 ]
|
|
|
|
mean value: 0.8169439514399193
|
|
|
|
key: train_precision
|
|
value: [0.83552632 0.82679739 0.83112583 0.83223684 0.81848185 0.82736156
|
|
0.82622951 0.83278689 0.82200647 0.82847896]
|
|
|
|
mean value: 0.8281031613368782
|
|
|
|
key: test_recall
|
|
value: [0.84848485 0.87878788 0.93939394 0.78787879 0.87878788 0.84848485
|
|
0.875 0.9375 0.8125 0.84375 ]
|
|
|
|
mean value: 0.8650568181818182
|
|
|
|
key: train_recall
|
|
value: [0.8668942 0.86348123 0.85665529 0.86348123 0.84641638 0.8668942
|
|
0.85714286 0.86394558 0.86394558 0.8707483 ]
|
|
|
|
mean value: 0.861960483852244
|
|
|
|
key: test_roc_auc
|
|
value: [0.78787879 0.81818182 0.89157197 0.81581439 0.84564394 0.78361742
|
|
0.83143939 0.87784091 0.84564394 0.84611742]
|
|
|
|
mean value: 0.834375
|
|
|
|
key: train_roc_auc
|
|
value: [0.84812287 0.84129693 0.84159295 0.84500592 0.82967078 0.84331104
|
|
0.83812774 0.84494207 0.83811613 0.84493046]
|
|
|
|
mean value: 0.8415116900002322
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.70731707 0.81578947 0.68421053 0.74358974 0.66666667
|
|
0.71794872 0.78947368 0.72222222 0.72972973]
|
|
|
|
mean value: 0.7243614504205005
|
|
|
|
key: train_jcc
|
|
value: [0.74052478 0.73121387 0.72965116 0.73546512 0.71264368 0.73410405
|
|
0.72622478 0.73623188 0.7277937 0.73775216]
|
|
|
|
mean value: 0.7311605183224938
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02084875 0.02504754 0.02607274 0.03220367 0.03078127 0.02826762
|
|
0.03048325 0.02629876 0.02308607 0.02272677]
|
|
|
|
mean value: 0.026581645011901855
|
|
|
|
key: score_time
|
|
value: [0.01154184 0.01212072 0.01222634 0.0122149 0.01249433 0.01250577
|
|
0.01228857 0.01224852 0.01216984 0.01211524]
|
|
|
|
mean value: 0.012192606925964355
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 0.88531564 0.88340557 0.84659091 0.94017476 0.90814394
|
|
0.87867338 0.71318944 0.90814394 0.96966868]
|
|
|
|
mean value: 0.8874430205026376
|
|
|
|
key: train_mcc
|
|
value: [0.9527735 0.98298668 0.91765351 0.96592835 0.97283366 0.96595038
|
|
0.94982099 0.87645168 0.92677947 0.94212842]
|
|
|
|
mean value: 0.9453306644273733
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.93939394 0.93846154 0.92307692 0.96923077 0.95384615
|
|
0.93846154 0.84615385 0.95384615 0.98461538]
|
|
|
|
mean value: 0.9416783216783217
|
|
|
|
key: train_accuracy
|
|
value: [0.97610922 0.99146758 0.95741056 0.98296422 0.98637138 0.98296422
|
|
0.97444634 0.93526405 0.96252129 0.97103918]
|
|
|
|
mean value: 0.9720558052456233
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 0.94285714 0.94285714 0.92307692 0.97058824 0.95384615
|
|
0.93939394 0.82142857 0.95384615 0.98412698]
|
|
|
|
mean value: 0.9402609482021247
|
|
|
|
key: train_fscore
|
|
value: [0.97651007 0.99142367 0.9589491 0.98293515 0.98644068 0.98287671
|
|
0.9750416 0.93140794 0.96369637 0.97094017]
|
|
|
|
mean value: 0.9720221458694838
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.89189189 0.89189189 0.9375 0.94285714 0.96875
|
|
0.91176471 0.95833333 0.93939394 1. ]
|
|
|
|
mean value: 0.9385240048107695
|
|
|
|
key: train_precision
|
|
value: [0.96039604 0.99655172 0.92405063 0.98293515 0.97979798 0.9862543
|
|
0.95439739 0.99230769 0.93589744 0.97594502]
|
|
|
|
mean value: 0.9688533365091594
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.90909091 1. 0.93939394
|
|
0.96875 0.71875 0.96875 0.96875 ]
|
|
|
|
mean value: 0.9473484848484849
|
|
|
|
key: train_recall
|
|
value: [0.99317406 0.98634812 0.99658703 0.98293515 0.99317406 0.97952218
|
|
0.99659864 0.87755102 0.99319728 0.96598639]
|
|
|
|
mean value: 0.9765073947667804
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.93939394 0.9375 0.92329545 0.96875 0.95407197
|
|
0.93892045 0.84422348 0.95407197 0.984375 ]
|
|
|
|
mean value: 0.9414299242424242
|
|
|
|
key: train_roc_auc
|
|
value: [0.97610922 0.99146758 0.95747719 0.98296418 0.98638295 0.98295837
|
|
0.97440853 0.93536254 0.96246895 0.9710478 ]
|
|
|
|
mean value: 0.9720647303289917
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 0.89189189 0.89189189 0.85714286 0.94285714 0.91176471
|
|
0.88571429 0.6969697 0.91176471 0.96875 ]
|
|
|
|
mean value: 0.8901604321089616
|
|
|
|
key: train_jcc
|
|
value: [0.95409836 0.9829932 0.92113565 0.96644295 0.97324415 0.96632997
|
|
0.9512987 0.87162162 0.92993631 0.94352159]
|
|
|
|
mean value: 0.946062249446683
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01608205 0.02083969 0.02255607 0.02071667 0.02331257 0.02741241
|
|
0.02450824 0.02299356 0.02310205 0.02305198]
|
|
|
|
mean value: 0.022457528114318847
|
|
|
|
key: score_time
|
|
value: [0.01083064 0.01254272 0.01454592 0.01292634 0.01404285 0.01232314
|
|
0.01454258 0.01225185 0.01214027 0.02303076]
|
|
|
|
mean value: 0.01391770839691162
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 0.85839508 0.77695466 0.84644588 0.63287203 0.87689394
|
|
0.65648795 0.87689394 0.87867338 0.96966868]
|
|
|
|
mean value: 0.8314409482187154
|
|
|
|
key: train_mcc
|
|
value: [0.94242422 0.96928892 0.71494202 0.95598057 0.71292201 0.97955701
|
|
0.75595137 0.96265981 0.94043526 0.96595117]
|
|
|
|
mean value: 0.8900112361644232
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.92424242 0.87692308 0.92307692 0.78461538 0.93846154
|
|
0.8 0.93846154 0.93846154 0.98461538]
|
|
|
|
mean value: 0.9078554778554778
|
|
|
|
key: train_accuracy
|
|
value: [0.97098976 0.98464164 0.83816014 0.97785349 0.83816014 0.98977853
|
|
0.8637138 0.98126065 0.9693356 0.98296422]
|
|
|
|
mean value: 0.9396857975126606
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.92957746 0.89189189 0.92537313 0.73076923 0.93939394
|
|
0.83116883 0.9375 0.93939394 0.98412698]
|
|
|
|
mean value: 0.9077945415861908
|
|
|
|
key: train_fscore
|
|
value: [0.97053726 0.98461538 0.86049927 0.97807757 0.80730223 0.98976109
|
|
0.88023952 0.98145025 0.97029703 0.98293515]
|
|
|
|
mean value: 0.9405714764352172
|
|
|
|
key: test_precision
|
|
value: [1. 0.86842105 0.80487805 0.91176471 1. 0.93939394
|
|
0.71111111 0.9375 0.91176471 1. ]
|
|
|
|
mean value: 0.9084833563681823
|
|
|
|
key: train_precision
|
|
value: [0.98591549 0.98630137 0.75515464 0.96666667 0.995 0.98976109
|
|
0.78609626 0.97324415 0.94230769 0.98630137]
|
|
|
|
mean value: 0.9366748726825244
|
|
|
|
key: test_recall
|
|
value: [0.93939394 1. 1. 0.93939394 0.57575758 0.93939394
|
|
1. 0.9375 0.96875 0.96875 ]
|
|
|
|
mean value: 0.9268939393939394
|
|
|
|
key: train_recall
|
|
value: [0.9556314 0.98293515 1. 0.98976109 0.67918089 0.98976109
|
|
1. 0.98979592 1. 0.97959184]
|
|
|
|
mean value: 0.956665737967542
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.92424242 0.875 0.92282197 0.78787879 0.93844697
|
|
0.8030303 0.93844697 0.93892045 0.984375 ]
|
|
|
|
mean value: 0.9082859848484849
|
|
|
|
key: train_roc_auc
|
|
value: [0.97098976 0.98464164 0.83843537 0.97787374 0.83788976 0.98977851
|
|
0.86348123 0.98124608 0.96928328 0.98296998]
|
|
|
|
mean value: 0.9396589352464535
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.86842105 0.80487805 0.86111111 0.57575758 0.88571429
|
|
0.71111111 0.88235294 0.88571429 0.96875 ]
|
|
|
|
mean value: 0.8383204351390846
|
|
|
|
key: train_jcc
|
|
value: [0.94276094 0.96969697 0.75515464 0.95709571 0.67687075 0.97972973
|
|
0.78609626 0.96357616 0.94230769 0.96644295]
|
|
|
|
mean value: 0.8939731800185893
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.26672578 0.26924062 0.26172447 0.25810003 0.25932431 0.26393604
|
|
0.25689721 0.257658 0.26804256 0.26593328]
|
|
|
|
mean value: 0.2627582311630249
|
|
|
|
key: score_time
|
|
value: [0.0161252 0.0157783 0.01563334 0.01572132 0.01619577 0.01593256
|
|
0.0156095 0.01570439 0.01582265 0.01575351]
|
|
|
|
mean value: 0.015827655792236328
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.90814394 0.90805728 1. 0.96969697
|
|
0.96966868 0.96969697 0.96966868 1. ]
|
|
|
|
mean value: 0.9580248163606737
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.95384615 0.95384615 1. 0.98461538
|
|
0.98461538 0.98461538 0.98461538 1. ]
|
|
|
|
mean value: 0.9785547785547786
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.95384615 0.95522388 1. 0.98461538
|
|
0.98412698 0.98461538 0.98412698 1. ]
|
|
|
|
mean value: 0.9789411914785049
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.96875 0.94117647 1. 1.
|
|
1. 0.96969697 1. 1. ]
|
|
|
|
mean value: 0.9771515332177096
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.93939394 0.96969697 1. 0.96969697
|
|
0.96875 1. 0.96875 1. ]
|
|
|
|
mean value: 0.9816287878787879
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.95407197 0.95359848 1. 0.98484848
|
|
0.984375 0.98484848 0.984375 1. ]
|
|
|
|
mean value: 0.9785511363636363
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.91176471 0.91428571 1. 0.96969697
|
|
0.96875 0.96969697 0.96875 1. ]
|
|
|
|
mean value: 0.9594836251453899
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11414218 0.11675215 0.06662536 0.08681798 0.09918189 0.10906219
|
|
0.08984184 0.11081529 0.09279585 0.09611773]
|
|
|
|
mean value: 0.09821524620056152
|
|
|
|
key: score_time
|
|
value: [0.03923726 0.03169036 0.019665 0.02797008 0.03503919 0.03238821
|
|
0.02360916 0.03317976 0.03165078 0.01983523]
|
|
|
|
mean value: 0.02942650318145752
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.93844697 0.81706198 0.93844697 1.
|
|
0.93844697 0.90814394 0.96966868 0.96969697]
|
|
|
|
mean value: 0.9365228119413505
|
|
|
|
key: train_mcc
|
|
value: [0.99659284 0.9931972 0.9863944 0.99659864 0.9965986 0.99318567
|
|
0.99318567 0.99318567 0.98639408 0.98637134]
|
|
|
|
mean value: 0.9921704109887847
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.96923077 0.90769231 0.96923077 1.
|
|
0.96923077 0.95384615 0.98461538 0.98461538]
|
|
|
|
mean value: 0.9677855477855478
|
|
|
|
key: train_accuracy
|
|
value: [0.99829352 0.99658703 0.99318569 0.99829642 0.99829642 0.99659284
|
|
0.99659284 0.99659284 0.99318569 0.99318569]
|
|
|
|
mean value: 0.9960808995819549
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.96969697 0.90625 0.96969697 1.
|
|
0.96875 0.95384615 0.98412698 0.98461538]
|
|
|
|
mean value: 0.9679839604839605
|
|
|
|
key: train_fscore
|
|
value: [0.9982906 0.99659864 0.99319728 0.99829642 0.9982906 0.99658703
|
|
0.99659864 0.99659864 0.99322034 0.99319728]
|
|
|
|
mean value: 0.9960875464958671
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.96969697 0.93548387 0.96969697 1.
|
|
0.96875 0.93939394 1. 0.96969697]
|
|
|
|
mean value: 0.9644610611344482
|
|
|
|
key: train_precision
|
|
value: [1. 0.99322034 0.98983051 0.99659864 1. 0.99658703
|
|
0.99659864 0.99659864 0.98986486 0.99319728]
|
|
|
|
mean value: 0.9952495940318127
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96969697 0.87878788 0.96969697 1.
|
|
0.96875 0.96875 0.96875 1. ]
|
|
|
|
mean value: 0.9724431818181818
|
|
|
|
key: train_recall
|
|
value: [0.99658703 1. 0.99658703 1. 0.99658703 0.99658703
|
|
0.99659864 0.99659864 0.99659864 0.99319728]
|
|
|
|
mean value: 0.9969341320145806
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.96922348 0.90814394 0.96922348 1.
|
|
0.96922348 0.95407197 0.984375 0.98484848]
|
|
|
|
mean value: 0.9678503787878788
|
|
|
|
key: train_roc_auc
|
|
value: [0.99829352 0.99658703 0.99319147 0.99829932 0.99829352 0.99659284
|
|
0.99659284 0.99659284 0.99317987 0.99318567]
|
|
|
|
mean value: 0.9960808896937614
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.94117647 0.82857143 0.94117647 1.
|
|
0.93939394 0.91176471 0.96875 0.96969697]
|
|
|
|
mean value: 0.9392421876613053
|
|
|
|
key: train_jcc
|
|
value: [0.99658703 0.99322034 0.98648649 0.99659864 0.99658703 0.99319728
|
|
0.99322034 0.99322034 0.98653199 0.98648649]
|
|
|
|
mean value: 0.9922135956254906
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19741392 0.30958152 0.31759167 0.32844281 0.26167846 0.33871961
|
|
0.36064386 0.29585671 0.40997648 0.32770634]
|
|
|
|
mean value: 0.3147611379623413
|
|
|
|
key: score_time
|
|
value: [0.01713848 0.02869463 0.02886558 0.02881336 0.02947807 0.02899432
|
|
0.02927995 0.05242538 0.04720736 0.02807188]
|
|
|
|
mean value: 0.03189690113067627
|
|
|
|
key: test_mcc
|
|
value: [0.75757576 0.81818182 0.63068182 0.76761091 0.78763191 0.63222777
|
|
0.62588014 0.87844611 0.78822732 0.81706198]
|
|
|
|
mean value: 0.7503525533343659
|
|
|
|
key: train_mcc
|
|
value: [0.9761149 0.97271891 0.96595038 0.96934096 0.97957952 0.97957999
|
|
0.96934096 0.97274268 0.96934132 0.96595117]
|
|
|
|
mean value: 0.9720660801372744
|
|
|
|
key: test_accuracy
|
|
value: [0.87878788 0.90909091 0.81538462 0.87692308 0.89230769 0.81538462
|
|
0.8 0.93846154 0.89230769 0.90769231]
|
|
|
|
mean value: 0.8726340326340326
|
|
|
|
key: train_accuracy
|
|
value: [0.98805461 0.98634812 0.98296422 0.9846678 0.98977853 0.98977853
|
|
0.9846678 0.98637138 0.9846678 0.98296422]
|
|
|
|
mean value: 0.9860263037019379
|
|
|
|
key: test_fscore
|
|
value: [0.87878788 0.90909091 0.81818182 0.86666667 0.89855072 0.8125
|
|
0.82191781 0.93548387 0.89552239 0.90909091]
|
|
|
|
mean value: 0.8745792973702484
|
|
|
|
key: train_fscore
|
|
value: [0.98803419 0.98630137 0.98287671 0.98461538 0.98972603 0.98979592
|
|
0.98471986 0.98639456 0.9846678 0.98293515]
|
|
|
|
mean value: 0.9860066978574287
|
|
|
|
key: test_precision
|
|
value: [0.87878788 0.90909091 0.81818182 0.96296296 0.86111111 0.83870968
|
|
0.73170732 0.96666667 0.85714286 0.88235294]
|
|
|
|
mean value: 0.87067141396132
|
|
|
|
key: train_precision
|
|
value: [0.98972603 0.98969072 0.9862543 0.98630137 0.99312715 0.98644068
|
|
0.98305085 0.98639456 0.98634812 0.98630137]
|
|
|
|
mean value: 0.9873635138185494
|
|
|
|
key: test_recall
|
|
value: [0.87878788 0.90909091 0.81818182 0.78787879 0.93939394 0.78787879
|
|
0.9375 0.90625 0.9375 0.9375 ]
|
|
|
|
mean value: 0.8839962121212122
|
|
|
|
key: train_recall
|
|
value: [0.98634812 0.98293515 0.97952218 0.98293515 0.98634812 0.99317406
|
|
0.98639456 0.98639456 0.9829932 0.97959184]
|
|
|
|
mean value: 0.9846636948294676
|
|
|
|
key: test_roc_auc
|
|
value: [0.87878788 0.90909091 0.81534091 0.87831439 0.89157197 0.81581439
|
|
0.80208333 0.93797348 0.89299242 0.90814394]
|
|
|
|
mean value: 0.8730113636363637
|
|
|
|
key: train_roc_auc
|
|
value: [0.98805461 0.98634812 0.98295837 0.98466486 0.9897727 0.98978431
|
|
0.98466486 0.98637134 0.98467066 0.98296998]
|
|
|
|
mean value: 0.9860259803580135
|
|
|
|
key: test_jcc
|
|
value: [0.78378378 0.83333333 0.69230769 0.76470588 0.81578947 0.68421053
|
|
0.69767442 0.87878788 0.81081081 0.83333333]
|
|
|
|
mean value: 0.7794737133314424
|
|
|
|
key: train_jcc
|
|
value: [0.97635135 0.97297297 0.96632997 0.96969697 0.97966102 0.97979798
|
|
0.96989967 0.97315436 0.96979866 0.96644295]
|
|
|
|
mean value: 0.9724105895804595
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.08177519 1.07353163 1.07433748 1.08860278 1.09842777 1.1075387
|
|
1.09151793 1.08468246 1.08263707 1.08584714]
|
|
|
|
mean value: 1.0868898153305053
|
|
|
|
key: score_time
|
|
value: [0.00959563 0.00942159 0.00925279 0.00983596 0.00952816 0.01010299
|
|
0.01016855 0.00941157 0.00962877 0.01072001]
|
|
|
|
mean value: 0.009766602516174316
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.93844697 0.90805728 0.93844697 1.
|
|
0.96966868 0.90814394 0.96966868 0.96969697]
|
|
|
|
mean value: 0.9487445133303707
|
|
|
|
key: train_mcc
|
|
value: [0.99659284 1. 1. 1. 0.9965986 0.9965986
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9989790035143291
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.96923077 0.95384615 0.96923077 1.
|
|
0.98461538 0.95384615 0.98461538 0.98461538]
|
|
|
|
mean value: 0.973939393939394
|
|
|
|
key: train_accuracy
|
|
value: [0.99829352 1. 1. 1. 0.99829642 0.99829642
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994886360332809
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.96969697 0.95522388 0.96969697 1.
|
|
0.98412698 0.95384615 0.98412698 0.98461538]
|
|
|
|
mean value: 0.9744190469563604
|
|
|
|
key: train_fscore
|
|
value: [0.9982906 1. 1. 1. 0.9982906 0.9982906 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.9994871794871795
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.96969697 0.94117647 0.96969697 1.
|
|
1. 0.93939394 1. 0.96969697]
|
|
|
|
mean value: 0.9681553210964976
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96969697 0.96969697 0.96969697 1.
|
|
0.96875 0.96875 0.96875 1. ]
|
|
|
|
mean value: 0.9815340909090909
|
|
|
|
key: train_recall
|
|
value: [0.99658703 1. 1. 1. 0.99658703 0.99658703
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.998976109215017
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.96922348 0.95359848 0.96922348 1.
|
|
0.984375 0.95407197 0.984375 0.98484848]
|
|
|
|
mean value: 0.9739109848484848
|
|
|
|
key: train_roc_auc
|
|
value: [0.99829352 1. 1. 1. 0.99829352 0.99829352
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9994880546075086
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.94117647 0.91428571 0.94117647 1.
|
|
0.96875 0.91176471 0.96875 0.96969697]
|
|
|
|
mean value: 0.9507492222933399
|
|
|
|
key: train_jcc
|
|
value: [0.99658703 1. 1. 1. 0.99658703 0.99658703
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.998976109215017
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03513765 0.03600693 0.06509256 0.03364587 0.04783249 0.04859257
|
|
0.04702735 0.05344486 0.06726575 0.03752756]
|
|
|
|
mean value: 0.04715735912322998
|
|
|
|
key: score_time
|
|
value: [0.01282334 0.01310682 0.01312494 0.01919127 0.01749039 0.01724458
|
|
0.02123213 0.01419568 0.01420665 0.01502204]
|
|
|
|
mean value: 0.01576378345489502
|
|
|
|
key: test_mcc
|
|
value: [0.88040627 1. 0.93844697 0.90805728 0.96966868 0.94017476
|
|
0.84659091 0.90814394 0.87844611 0.96966868]
|
|
|
|
mean value: 0.9239603602279465
|
|
|
|
key: train_mcc
|
|
value: [0.99659284 0.9830783 0.97976246 0.98646327 0.98646327 0.98983039
|
|
0.98978419 0.98646265 0.98301523 0.98310636]
|
|
|
|
mean value: 0.9864558957440082
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 1. 0.96923077 0.95384615 0.98461538 0.96923077
|
|
0.92307692 0.95384615 0.93846154 0.98461538]
|
|
|
|
mean value: 0.9616317016317016
|
|
|
|
key: train_accuracy
|
|
value: [0.99829352 0.99146758 0.98977853 0.99318569 0.99318569 0.99488927
|
|
0.99488927 0.99318569 0.99148211 0.99148211]
|
|
|
|
mean value: 0.9931839456715759
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 1. 0.96969697 0.95522388 0.98507463 0.97058824
|
|
0.92307692 0.95384615 0.93548387 0.98412698]
|
|
|
|
mean value: 0.9618294115059812
|
|
|
|
key: train_fscore
|
|
value: [0.99829642 0.99153976 0.98986486 0.99322034 0.99322034 0.99490662
|
|
0.99490662 0.99324324 0.99153976 0.9915683 ]
|
|
|
|
mean value: 0.99323062743685
|
|
|
|
key: test_precision
|
|
value: [0.91428571 1. 0.96969697 0.94117647 0.97058824 0.94285714
|
|
0.90909091 0.93939394 0.96666667 1. ]
|
|
|
|
mean value: 0.9553756047873695
|
|
|
|
key: train_precision
|
|
value: [0.99659864 0.98322148 0.97993311 0.98653199 0.98653199 0.98986486
|
|
0.99322034 0.98657718 0.98653199 0.98327759]
|
|
|
|
mean value: 0.9872289162958916
|
|
|
|
key: test_recall
|
|
value: [0.96969697 1. 0.96969697 0.96969697 1. 1.
|
|
0.9375 0.96875 0.90625 0.96875 ]
|
|
|
|
mean value: 0.9690340909090909
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.99659864 1. 0.99659864 1. ]
|
|
|
|
mean value: 0.9993197278911564
|
|
|
|
key: test_roc_auc
|
|
value: [0.93939394 1. 0.96922348 0.95359848 0.984375 0.96875
|
|
0.92329545 0.95407197 0.93797348 0.984375 ]
|
|
|
|
mean value: 0.9615056818181819
|
|
|
|
key: train_roc_auc
|
|
value: [0.99829352 0.99146758 0.98979592 0.99319728 0.99319728 0.99489796
|
|
0.99488635 0.99317406 0.99147338 0.99146758]
|
|
|
|
mean value: 0.9931850897355529
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 1. 0.94117647 0.91428571 0.97058824 0.94285714
|
|
0.85714286 0.91176471 0.87878788 0.96875 ]
|
|
|
|
mean value: 0.9274241893727188
|
|
|
|
key: train_jcc
|
|
value: [0.99659864 0.98322148 0.97993311 0.98653199 0.98653199 0.98986486
|
|
0.98986486 0.98657718 0.98322148 0.98327759]
|
|
|
|
mean value: 0.986562317881881
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03119612 0.04447436 0.03461266 0.03525758 0.03817344 0.03466368
|
|
0.0472641 0.03969932 0.04068828 0.04268384]
|
|
|
|
mean value: 0.03887133598327637
|
|
|
|
key: score_time
|
|
value: [0.03358865 0.0293026 0.01909995 0.02525187 0.02937794 0.01904321
|
|
0.01923108 0.01922727 0.01910567 0.01917934]
|
|
|
|
mean value: 0.02324075698852539
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.87844611 0.90814394 0.91144345 0.93844697
|
|
0.90805728 0.90814394 0.94017476 0.96966868]
|
|
|
|
mean value: 0.9247840769518721
|
|
|
|
key: train_mcc
|
|
value: [0.95230718 0.9761149 0.96938669 0.96266197 0.96266197 0.95575756
|
|
0.95238704 0.95913582 0.95920216 0.95575602]
|
|
|
|
mean value: 0.9605371289558845
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.93846154 0.95384615 0.95384615 0.96923077
|
|
0.95384615 0.95384615 0.96923077 0.98461538]
|
|
|
|
mean value: 0.9616317016317016
|
|
|
|
key: train_accuracy
|
|
value: [0.97610922 0.98805461 0.9846678 0.98126065 0.98126065 0.97785349
|
|
0.97614991 0.97955707 0.97955707 0.97785349]
|
|
|
|
mean value: 0.9802323958811798
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.94117647 0.95384615 0.95652174 0.96969697
|
|
0.95238095 0.95384615 0.96774194 0.98412698]
|
|
|
|
mean value: 0.9622194501956898
|
|
|
|
key: train_fscore
|
|
value: [0.97627119 0.98807496 0.98471986 0.98138748 0.98138748 0.97792869
|
|
0.97635135 0.97966102 0.97972973 0.97800338]
|
|
|
|
mean value: 0.9803515140551106
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.91428571 0.96875 0.91666667 0.96969697
|
|
0.96774194 0.93939394 1. 1. ]
|
|
|
|
mean value: 0.9568427117419053
|
|
|
|
key: train_precision
|
|
value: [0.96969697 0.98639456 0.97972973 0.97315436 0.97315436 0.97297297
|
|
0.96979866 0.97635135 0.97315436 0.97306397]
|
|
|
|
mean value: 0.9747471299604569
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96969697 0.93939394 1. 0.96969697
|
|
0.9375 0.96875 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9691287878787879
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.98976109 0.98976109 0.98976109 0.98976109 0.98293515
|
|
0.9829932 0.9829932 0.98639456 0.9829932 ]
|
|
|
|
mean value: 0.9860288825427782
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.93797348 0.95407197 0.953125 0.96922348
|
|
0.95359848 0.95407197 0.96875 0.984375 ]
|
|
|
|
mean value: 0.9614583333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.97610922 0.98805461 0.98467646 0.9812751 0.9812751 0.97786213
|
|
0.97613824 0.97955121 0.9795454 0.97784472]
|
|
|
|
mean value: 0.9802332195676906
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.88888889 0.91176471 0.91666667 0.94117647
|
|
0.90909091 0.91176471 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9277494238891297
|
|
|
|
key: train_jcc
|
|
value: [0.95364238 0.97643098 0.96989967 0.96345515 0.96345515 0.95681063
|
|
0.95379538 0.96013289 0.9602649 0.95695364]
|
|
|
|
mean value: 0.9614840769271095
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25791717 0.30864501 0.47040224 0.33794689 0.33028293 0.31463742
|
|
0.33145475 0.31620097 0.350003 0.41688848]
|
|
|
|
mean value: 0.3434378862380981
|
|
|
|
key: score_time
|
|
value: [0.01945615 0.02283406 0.01909637 0.01916385 0.0190537 0.01913309
|
|
0.0194664 0.01907849 0.01902366 0.0190413 ]
|
|
|
|
mean value: 0.019534707069396973
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.87844611 0.90814394 0.91144345 0.93844697
|
|
0.90805728 0.87867338 0.94017476 0.96966868]
|
|
|
|
mean value: 0.9218370210629971
|
|
|
|
key: train_mcc
|
|
value: [0.95230718 0.97952218 0.96938669 0.96266197 0.96266197 0.96934132
|
|
0.95238704 0.96595038 0.95920216 0.95575602]
|
|
|
|
mean value: 0.9629176905830358
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.93846154 0.95384615 0.95384615 0.96923077
|
|
0.95384615 0.93846154 0.96923077 0.98461538]
|
|
|
|
mean value: 0.9600932400932402
|
|
|
|
key: train_accuracy
|
|
value: [0.97610922 0.98976109 0.9846678 0.98126065 0.98126065 0.9846678
|
|
0.97614991 0.98296422 0.97955707 0.97785349]
|
|
|
|
mean value: 0.9814251908530097
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.94117647 0.95384615 0.95652174 0.96969697
|
|
0.95238095 0.93939394 0.96774194 0.98412698]
|
|
|
|
mean value: 0.9607742287504684
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:128: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:131: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.97627119 0.98976109 0.98471986 0.98138748 0.98138748 0.9846678
|
|
0.97635135 0.98305085 0.97972973 0.97800338]
|
|
|
|
mean value: 0.9815330215484706
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.91428571 0.96875 0.91666667 0.96969697
|
|
0.96774194 0.91176471 1. 1. ]
|
|
|
|
mean value: 0.9540797883907466
|
|
|
|
key: train_precision
|
|
value: [0.96969697 0.98976109 0.97972973 0.97315436 0.97315436 0.9829932
|
|
0.96979866 0.97972973 0.97315436 0.97306397]
|
|
|
|
mean value: 0.9764236436615927
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96969697 0.93939394 1. 0.96969697
|
|
0.9375 0.96875 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9691287878787879
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.98976109 0.98976109 0.98976109 0.98976109 0.98634812
|
|
0.9829932 0.98639456 0.98639456 0.9829932 ]
|
|
|
|
mean value: 0.9867103155255276
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.93797348 0.95407197 0.953125 0.96922348
|
|
0.95359848 0.93892045 0.96875 0.984375 ]
|
|
|
|
mean value: 0.9599431818181818
|
|
|
|
key: train_roc_auc
|
|
value: [0.97610922 0.98976109 0.98467646 0.9812751 0.9812751 0.98467066
|
|
0.97613824 0.98295837 0.9795454 0.97784472]
|
|
|
|
mean value: 0.9814254370690256
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.88888889 0.91176471 0.91666667 0.94117647
|
|
0.90909091 0.88571429 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9251443818723231
|
|
|
|
key: train_jcc
|
|
value: [0.95364238 0.97972973 0.96989967 0.96345515 0.96345515 0.96979866
|
|
0.95379538 0.96666667 0.9602649 0.95695364]
|
|
|
|
mean value: 0.9637661325359951
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03635168 0.04008698 0.03932405 0.0380702 0.03882837 0.03954506
|
|
0.04933882 0.04907322 0.03866506 0.05903292]
|
|
|
|
mean value: 0.04283163547515869
|
|
|
|
key: score_time
|
|
value: [0.01420403 0.01426244 0.01425099 0.01448369 0.01443338 0.0146153
|
|
0.01465511 0.01483965 0.01471305 0.02132845]
|
|
|
|
mean value: 0.015178608894348144
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.85201287 0.84644588 0.87867338 0.91144345 0.84659091
|
|
0.91168461 0.84659091 0.78763191 0.93844697]
|
|
|
|
mean value: 0.8729029478333615
|
|
|
|
key: train_mcc
|
|
value: [0.92569136 0.94894364 0.94583929 0.92504237 0.92889005 0.91487015
|
|
0.93886511 0.92506472 0.92512656 0.92189428]
|
|
|
|
mean value: 0.9300227538742016
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.92424242 0.92307692 0.93846154 0.95384615 0.92307692
|
|
0.95384615 0.92307692 0.89230769 0.96923077]
|
|
|
|
mean value: 0.9355710955710956
|
|
|
|
key: train_accuracy
|
|
value: [0.96245734 0.97440273 0.97274276 0.96252129 0.96422487 0.95741056
|
|
0.9693356 0.96252129 0.96252129 0.96081772]
|
|
|
|
mean value: 0.9648955468600101
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.92753623 0.92537313 0.9375 0.95652174 0.92307692
|
|
0.95522388 0.92307692 0.8852459 0.96875 ]
|
|
|
|
mean value: 0.9357528614330072
|
|
|
|
key: train_fscore
|
|
value: [0.9632107 0.97461929 0.97306397 0.96245734 0.96470588 0.95755518
|
|
0.96969697 0.96245734 0.96283784 0.96134454]
|
|
|
|
mean value: 0.9651949046484256
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.88888889 0.91176471 0.96774194 0.91666667 0.9375
|
|
0.91428571 0.90909091 0.93103448 0.96875 ]
|
|
|
|
mean value: 0.9286899773645259
|
|
|
|
key: train_precision
|
|
value: [0.9442623 0.96644295 0.96013289 0.96245734 0.95033113 0.9527027
|
|
0.96 0.96575342 0.95637584 0.95016611]
|
|
|
|
mean value: 0.9568624681422546
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.96969697 0.93939394 0.90909091 1. 0.90909091
|
|
1. 0.9375 0.84375 0.96875 ]
|
|
|
|
mean value: 0.9446969696969697
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.98293515 0.98634812 0.96245734 0.97952218 0.96245734
|
|
0.97959184 0.95918367 0.96938776 0.97278912]
|
|
|
|
mean value: 0.973760767105477
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.92424242 0.92282197 0.93892045 0.953125 0.92329545
|
|
0.95454545 0.92329545 0.89157197 0.96922348]
|
|
|
|
mean value: 0.9355587121212121
|
|
|
|
key: train_roc_auc
|
|
value: [0.96245734 0.97440273 0.9727659 0.96252119 0.96425089 0.95741915
|
|
0.9693181 0.96252699 0.96250958 0.96079729]
|
|
|
|
mean value: 0.9648969143971582
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.86486486 0.86111111 0.88235294 0.91666667 0.85714286
|
|
0.91428571 0.85714286 0.79411765 0.93939394]
|
|
|
|
mean value: 0.8801364313129019
|
|
|
|
key: train_jcc
|
|
value: [0.92903226 0.95049505 0.94754098 0.92763158 0.93181818 0.91856678
|
|
0.94117647 0.92763158 0.92833876 0.92556634]
|
|
|
|
mean value: 0.9327797981978533
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.05838704 0.86052942 1.04595804 1.11087394 1.15152407 1.06360507
|
|
1.18267727 1.06228065 1.11125398 1.15308499]
|
|
|
|
mean value: 1.0800174474716187
|
|
|
|
key: score_time
|
|
value: [0.01456332 0.01477814 0.01478934 0.01489806 0.01752996 0.01242924
|
|
0.01497817 0.01905251 0.01568937 0.0210464 ]
|
|
|
|
mean value: 0.015975451469421385
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.85201287 0.94017476 0.90805728 0.94017476 0.96966868
|
|
0.96969697 0.88382395 0.93844697 0.96969697]
|
|
|
|
mean value: 0.9281261797504913
|
|
|
|
key: train_mcc
|
|
value: [0.9761149 0.9863711 1. 0.96592835 0.96934132 0.97274268
|
|
0.96934096 0.97274268 0.97276495 0.97274268]
|
|
|
|
mean value: 0.9758089630087908
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.92424242 0.96923077 0.95384615 0.96923077 0.98461538
|
|
0.98461538 0.93846154 0.96923077 0.98461538]
|
|
|
|
mean value: 0.9632634032634033
|
|
|
|
key: train_accuracy
|
|
value: [0.98805461 0.99317406 1. 0.98296422 0.9846678 0.98637138
|
|
0.9846678 0.98637138 0.98637138 0.98637138]
|
|
|
|
mean value: 0.9879014018175369
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.92753623 0.97058824 0.95522388 0.97058824 0.98507463
|
|
0.98461538 0.94117647 0.96875 0.98461538]
|
|
|
|
mean value: 0.9643392330350999
|
|
|
|
key: train_fscore
|
|
value: [0.98807496 0.99315068 1. 0.98293515 0.9846678 0.98634812
|
|
0.98471986 0.98639456 0.98644068 0.98639456]
|
|
|
|
mean value: 0.987912637896652
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.88888889 0.94285714 0.94117647 0.94285714 0.97058824
|
|
0.96969697 0.88888889 0.96875 0.96969697]
|
|
|
|
mean value: 0.9424577179356591
|
|
|
|
key: train_precision
|
|
value: [0.98639456 0.99656357 1. 0.98293515 0.9829932 0.98634812
|
|
0.98305085 0.98639456 0.98310811 0.98639456]
|
|
|
|
mean value: 0.9874182676647708
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.96969697 1. 0.96969697 1. 1.
|
|
1. 1. 0.96875 1. ]
|
|
|
|
mean value: 0.9877840909090909
|
|
|
|
key: train_recall
|
|
value: [0.98976109 0.98976109 1. 0.98293515 0.98634812 0.98634812
|
|
0.98639456 0.98639456 0.98979592 0.98639456]
|
|
|
|
mean value: 0.9884133175454481
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.92424242 0.96875 0.95359848 0.96875 0.984375
|
|
0.98484848 0.93939394 0.96922348 0.98484848]
|
|
|
|
mean value: 0.9632575757575758
|
|
|
|
key: train_roc_auc
|
|
value: [0.98805461 0.99317406 1. 0.98296418 0.98467066 0.98637134
|
|
0.98466486 0.98637134 0.98636554 0.98637134]
|
|
|
|
mean value: 0.9879007917160039
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.86486486 0.94285714 0.91428571 0.94285714 0.97058824
|
|
0.96969697 0.88888889 0.93939394 0.96969697]
|
|
|
|
mean value: 0.9317415582121464
|
|
|
|
key: train_jcc
|
|
value: [0.97643098 0.98639456 1. 0.96644295 0.96979866 0.97306397
|
|
0.96989967 0.97315436 0.97324415 0.97315436]
|
|
|
|
mean value: 0.9761583655597579
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01527762 0.01133418 0.01078105 0.01043081 0.01038504 0.01038885
|
|
0.01011038 0.01063275 0.01037979 0.0102396 ]
|
|
|
|
mean value: 0.010996007919311523
|
|
|
|
key: score_time
|
|
value: [0.01235747 0.00942874 0.00928879 0.00893927 0.00882173 0.00888205
|
|
0.0088284 0.00891542 0.00892353 0.00884128]
|
|
|
|
mean value: 0.009322667121887207
|
|
|
|
key: test_mcc
|
|
value: [0.48507125 0.62017367 0.63620086 0.63068182 0.75498882 0.57061637
|
|
0.67840053 0.63153153 0.84659091 0.60621087]
|
|
|
|
mean value: 0.646046662624106
|
|
|
|
key: train_mcc
|
|
value: [0.68269327 0.72405993 0.70759582 0.6797265 0.67306421 0.67743268
|
|
0.69679344 0.69335516 0.67985743 0.64233052]
|
|
|
|
mean value: 0.6856908963219008
|
|
|
|
key: test_accuracy
|
|
value: [0.74242424 0.8030303 0.81538462 0.81538462 0.87692308 0.78461538
|
|
0.83076923 0.81538462 0.92307692 0.8 ]
|
|
|
|
mean value: 0.8206993006993007
|
|
|
|
key: train_accuracy
|
|
value: [0.84129693 0.86177474 0.85349233 0.83986371 0.83645656 0.83475298
|
|
0.8483816 0.84667802 0.83986371 0.82112436]
|
|
|
|
mean value: 0.8423684960259549
|
|
|
|
key: test_fscore
|
|
value: [0.73846154 0.82191781 0.80645161 0.81818182 0.88235294 0.78125
|
|
0.84507042 0.80645161 0.92307692 0.77966102]
|
|
|
|
mean value: 0.8202875694406744
|
|
|
|
key: train_fscore
|
|
value: [0.83993115 0.86432161 0.85618729 0.83959044 0.83783784 0.8207024
|
|
0.84940778 0.84693878 0.84175084 0.82293423]
|
|
|
|
mean value: 0.8419602370069587
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.86206897 0.81818182 0.85714286 0.80645161
|
|
0.76923077 0.83333333 0.90909091 0.85185185]
|
|
|
|
mean value: 0.8207352117252006
|
|
|
|
key: train_precision
|
|
value: [0.84722222 0.84868421 0.83934426 0.83959044 0.82943144 0.89516129
|
|
0.84511785 0.84693878 0.83333333 0.81605351]
|
|
|
|
mean value: 0.8440877332846366
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.90909091 0.75757576 0.81818182 0.90909091 0.75757576
|
|
0.9375 0.78125 0.9375 0.71875 ]
|
|
|
|
mean value: 0.8253787878787879
|
|
|
|
key: train_recall
|
|
value: [0.83276451 0.88054608 0.87372014 0.83959044 0.84641638 0.75767918
|
|
0.8537415 0.84693878 0.85034014 0.82993197]
|
|
|
|
mean value: 0.8411669104501869
|
|
|
|
key: test_roc_auc
|
|
value: [0.74242424 0.8030303 0.81628788 0.81534091 0.87642045 0.78503788
|
|
0.83238636 0.81486742 0.92329545 0.79876894]
|
|
|
|
mean value: 0.8207859848484849
|
|
|
|
key: train_roc_auc
|
|
value: [0.84129693 0.86177474 0.85352673 0.83986325 0.8364735 0.8346219
|
|
0.84837245 0.84667758 0.83984584 0.82110933]
|
|
|
|
mean value: 0.8423562257667572
|
|
|
|
key: test_jcc
|
|
value: [0.58536585 0.69767442 0.67567568 0.69230769 0.78947368 0.64102564
|
|
0.73170732 0.67567568 0.85714286 0.63888889]
|
|
|
|
mean value: 0.6984937704263315
|
|
|
|
key: train_jcc
|
|
value: [0.72403561 0.76106195 0.74853801 0.72352941 0.72093023 0.69592476
|
|
0.73823529 0.73451327 0.72674419 0.6991404 ]
|
|
|
|
mean value: 0.7272653131766867
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01057482 0.01148009 0.01069951 0.01047349 0.01045203 0.01067877
|
|
0.0105927 0.01061225 0.01050448 0.01047277]
|
|
|
|
mean value: 0.010654091835021973
|
|
|
|
key: score_time
|
|
value: [0.00899243 0.00903797 0.008986 0.00885749 0.00882101 0.00897503
|
|
0.00889325 0.00890899 0.00888324 0.0088706 ]
|
|
|
|
mean value: 0.008922600746154785
|
|
|
|
key: test_mcc
|
|
value: [0.42919754 0.53099079 0.58027158 0.47727273 0.72348485 0.54131274
|
|
0.63068182 0.63153153 0.64942422 0.47810304]
|
|
|
|
mean value: 0.5672270829566963
|
|
|
|
key: train_mcc
|
|
value: [0.58200069 0.60001567 0.58515758 0.56541828 0.58185441 0.60844172
|
|
0.56880028 0.60266953 0.58602719 0.60241221]
|
|
|
|
mean value: 0.5882797582957422
|
|
|
|
key: test_accuracy
|
|
value: [0.71212121 0.75757576 0.78461538 0.73846154 0.86153846 0.76923077
|
|
0.81538462 0.81538462 0.81538462 0.73846154]
|
|
|
|
mean value: 0.7808158508158508
|
|
|
|
key: train_accuracy
|
|
value: [0.79010239 0.79863481 0.79216354 0.78194208 0.79045997 0.80408859
|
|
0.78364566 0.80068143 0.79216354 0.80068143]
|
|
|
|
mean value: 0.7934563436458885
|
|
|
|
key: test_fscore
|
|
value: [0.68852459 0.78378378 0.76666667 0.73846154 0.86153846 0.76190476
|
|
0.8125 0.80645161 0.78571429 0.72131148]
|
|
|
|
mean value: 0.7726857176546494
|
|
|
|
key: train_fscore
|
|
value: [0.78152753 0.78853047 0.78596491 0.77304965 0.78383128 0.80069324
|
|
0.77601411 0.7943761 0.7844523 0.79509632]
|
|
|
|
mean value: 0.7863535905385026
|
|
|
|
key: test_precision
|
|
value: [0.75 0.70731707 0.85185185 0.75 0.875 0.8
|
|
0.8125 0.83333333 0.91666667 0.75862069]
|
|
|
|
mean value: 0.8055289614677756
|
|
|
|
key: train_precision
|
|
value: [0.81481481 0.83018868 0.80866426 0.80442804 0.80797101 0.81338028
|
|
0.80586081 0.82181818 0.81617647 0.81949458]
|
|
|
|
mean value: 0.8142797137556002
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.87878788 0.6969697 0.72727273 0.84848485 0.72727273
|
|
0.8125 0.78125 0.6875 0.6875 ]
|
|
|
|
mean value: 0.7483901515151515
|
|
|
|
key: train_recall
|
|
value: [0.75085324 0.75085324 0.76450512 0.7440273 0.76109215 0.7883959
|
|
0.74829932 0.76870748 0.75510204 0.77210884]
|
|
|
|
mean value: 0.7603944649532167
|
|
|
|
key: test_roc_auc
|
|
value: [0.71212121 0.75757576 0.78598485 0.73863636 0.86174242 0.76988636
|
|
0.81534091 0.81486742 0.81344697 0.73768939]
|
|
|
|
mean value: 0.7807291666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.79010239 0.79863481 0.79211651 0.7818776 0.79041002 0.8040619
|
|
0.78370597 0.80073599 0.79222679 0.80073019]
|
|
|
|
mean value: 0.7934602168512456
|
|
|
|
key: test_jcc
|
|
value: [0.525 0.64444444 0.62162162 0.58536585 0.75675676 0.61538462
|
|
0.68421053 0.67567568 0.64705882 0.56410256]
|
|
|
|
mean value: 0.6319620881489416
|
|
|
|
key: train_jcc
|
|
value: [0.64139942 0.65088757 0.64739884 0.6300578 0.64450867 0.66763006
|
|
0.63400576 0.65889213 0.64534884 0.65988372]
|
|
|
|
mean value: 0.6480012816704841
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00982594 0.01073241 0.00973868 0.01103973 0.01097131 0.01004457
|
|
0.01003647 0.01040053 0.01030421 0.01033473]
|
|
|
|
mean value: 0.010342860221862793
|
|
|
|
key: score_time
|
|
value: [0.01514554 0.01278162 0.01276278 0.01355863 0.01304555 0.01275563
|
|
0.01325583 0.01311755 0.01306009 0.01343203]
|
|
|
|
mean value: 0.013291525840759277
|
|
|
|
key: test_mcc
|
|
value: [0.45538256 0.68219104 0.44739357 0.64071161 0.45812857 0.48131798
|
|
0.58873983 0.70516447 0.57061637 0.53838887]
|
|
|
|
mean value: 0.5568034847168876
|
|
|
|
key: train_mcc
|
|
value: [0.77368438 0.78627716 0.77867359 0.76093067 0.77108643 0.78072643
|
|
0.80246789 0.7985738 0.75589317 0.78313619]
|
|
|
|
mean value: 0.7791449713062735
|
|
|
|
key: test_accuracy
|
|
value: [0.72727273 0.83333333 0.72307692 0.81538462 0.72307692 0.73846154
|
|
0.78461538 0.84615385 0.78461538 0.76923077]
|
|
|
|
mean value: 0.7745221445221445
|
|
|
|
key: train_accuracy
|
|
value: [0.88225256 0.88737201 0.88586031 0.87393526 0.88074957 0.88415673
|
|
0.89608177 0.89437819 0.87223169 0.88756388]
|
|
|
|
mean value: 0.884458198394102
|
|
|
|
key: test_fscore
|
|
value: [0.73529412 0.84931507 0.71875 0.83333333 0.75675676 0.76056338
|
|
0.80555556 0.85714286 0.78787879 0.76190476]
|
|
|
|
mean value: 0.7866494618993952
|
|
|
|
key: train_fscore
|
|
value: [0.89064976 0.89622642 0.8928 0.884375 0.88924051 0.89341693
|
|
0.90393701 0.9022082 0.88262911 0.8952381 ]
|
|
|
|
mean value: 0.8930721024591308
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.775 0.74193548 0.76923077 0.68292683 0.71052632
|
|
0.725 0.78947368 0.76470588 0.77419355]
|
|
|
|
mean value: 0.7447278227395782
|
|
|
|
key: train_precision
|
|
value: [0.83136095 0.83090379 0.84036145 0.81556196 0.82890855 0.82608696
|
|
0.84164223 0.84117647 0.8173913 0.83928571]
|
|
|
|
mean value: 0.8312679371325126
|
|
|
|
key: test_recall
|
|
value: [0.75757576 0.93939394 0.6969697 0.90909091 0.84848485 0.81818182
|
|
0.90625 0.9375 0.8125 0.75 ]
|
|
|
|
mean value: 0.837594696969697
|
|
|
|
key: train_recall
|
|
value: [0.95904437 0.97269625 0.95221843 0.96587031 0.95904437 0.97269625
|
|
0.97619048 0.97278912 0.95918367 0.95918367]
|
|
|
|
mean value: 0.9648916904645817
|
|
|
|
key: test_roc_auc
|
|
value: [0.72727273 0.83333333 0.72348485 0.81392045 0.72111742 0.73721591
|
|
0.78645833 0.84753788 0.78503788 0.76893939]
|
|
|
|
mean value: 0.7744318181818182
|
|
|
|
key: train_roc_auc
|
|
value: [0.88225256 0.88737201 0.88597316 0.87409162 0.88088273 0.88430731
|
|
0.89594507 0.89424439 0.8720833 0.88744167]
|
|
|
|
mean value: 0.8844593810220334
|
|
|
|
key: test_jcc
|
|
value: [0.58139535 0.73809524 0.56097561 0.71428571 0.60869565 0.61363636
|
|
0.6744186 0.75 0.65 0.61538462]
|
|
|
|
mean value: 0.6506887146820315
|
|
|
|
key: train_jcc
|
|
value: [0.80285714 0.81196581 0.80635838 0.79271709 0.8005698 0.80736544
|
|
0.82471264 0.82183908 0.78991597 0.81034483]
|
|
|
|
mean value: 0.8068646180934557
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03434801 0.03376579 0.03416848 0.03222466 0.03244758 0.03181934
|
|
0.03270769 0.0325191 0.03169084 0.03116226]
|
|
|
|
mean value: 0.032685375213623045
|
|
|
|
key: score_time
|
|
value: [0.0160203 0.01425171 0.0150001 0.01322293 0.01301026 0.01341105
|
|
0.01340508 0.01416564 0.01349974 0.01314807]
|
|
|
|
mean value: 0.013913488388061524
|
|
|
|
key: test_mcc
|
|
value: [0.72760688 0.79708114 0.69223485 0.72649867 0.7935502 0.51508188
|
|
0.68964536 0.69810664 0.72322307 0.64071161]
|
|
|
|
mean value: 0.7003740298360195
|
|
|
|
key: train_mcc
|
|
value: [0.79968344 0.8805512 0.81608462 0.80591418 0.84344123 0.83013357
|
|
0.83327919 0.81260956 0.81945993 0.80637046]
|
|
|
|
mean value: 0.8247527376466051
|
|
|
|
key: test_accuracy
|
|
value: [0.86363636 0.89393939 0.84615385 0.86153846 0.89230769 0.75384615
|
|
0.83076923 0.84615385 0.86153846 0.81538462]
|
|
|
|
mean value: 0.8465268065268066
|
|
|
|
key: train_accuracy
|
|
value: [0.89931741 0.94027304 0.90800681 0.90289608 0.92163543 0.91482112
|
|
0.9165247 0.90630324 0.90971039 0.90289608]
|
|
|
|
mean value: 0.9122384310806961
|
|
|
|
key: test_fscore
|
|
value: [0.86153846 0.90140845 0.84848485 0.85714286 0.90140845 0.77777778
|
|
0.84931507 0.85294118 0.85714286 0.79310345]
|
|
|
|
mean value: 0.8500263396734854
|
|
|
|
key: train_fscore
|
|
value: [0.90183028 0.94017094 0.90721649 0.9035533 0.92068966 0.91610738
|
|
0.91764706 0.90662139 0.91032149 0.90121317]
|
|
|
|
mean value: 0.9125371166685831
|
|
|
|
key: test_precision
|
|
value: [0.875 0.84210526 0.84848485 0.9 0.84210526 0.71794872
|
|
0.75609756 0.80555556 0.87096774 0.88461538]
|
|
|
|
mean value: 0.834288033583139
|
|
|
|
key: train_precision
|
|
value: [0.87987013 0.94178082 0.91349481 0.89597315 0.93031359 0.9009901
|
|
0.90697674 0.90508475 0.90572391 0.91872792]
|
|
|
|
mean value: 0.9098935914566021
|
|
|
|
key: test_recall
|
|
value: [0.84848485 0.96969697 0.84848485 0.81818182 0.96969697 0.84848485
|
|
0.96875 0.90625 0.84375 0.71875 ]
|
|
|
|
mean value: 0.8740530303030303
|
|
|
|
key: train_recall
|
|
value: [0.92491468 0.93856655 0.90102389 0.9112628 0.9112628 0.93174061
|
|
0.92857143 0.90816327 0.91496599 0.88435374]
|
|
|
|
mean value: 0.9154825752826727
|
|
|
|
key: test_roc_auc
|
|
value: [0.86363636 0.89393939 0.84611742 0.86221591 0.89109848 0.75236742
|
|
0.83285985 0.84706439 0.86126894 0.81392045]
|
|
|
|
mean value: 0.8464488636363636
|
|
|
|
key: train_roc_auc
|
|
value: [0.89931741 0.94027304 0.90799494 0.90291031 0.92161779 0.9148499
|
|
0.91650414 0.90630006 0.90970142 0.90292772]
|
|
|
|
mean value: 0.9122396740266072
|
|
|
|
key: test_jcc
|
|
value: [0.75675676 0.82051282 0.73684211 0.75 0.82051282 0.63636364
|
|
0.73809524 0.74358974 0.75 0.65714286]
|
|
|
|
mean value: 0.7409815978237031
|
|
|
|
key: train_jcc
|
|
value: [0.82121212 0.88709677 0.83018868 0.82407407 0.85303514 0.84520124
|
|
0.84782609 0.82919255 0.83540373 0.82018927]
|
|
|
|
mean value: 0.8393419665581484
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.16521049 2.1405015 2.16510344 2.0510726 2.08916759 2.11430049
|
|
2.20137405 2.13904119 2.11048341 2.28039932]
|
|
|
|
mean value: 2.145665407180786
|
|
|
|
key: score_time
|
|
value: [0.01282048 0.01445389 0.02410007 0.01496387 0.01454306 0.01344037
|
|
0.01483035 0.01479912 0.01515126 0.0157845 ]
|
|
|
|
mean value: 0.015488696098327637
|
|
|
|
key: test_mcc
|
|
value: [0.9701425 0.88531564 0.91144345 0.96966868 0.94017476 0.91144345
|
|
0.88382395 0.91168461 0.91168461 1. ]
|
|
|
|
mean value: 0.9295381660328197
|
|
|
|
key: train_mcc
|
|
value: [0.99659284 0.99659284 1. 0.99659864 0.99659864 0.99659864
|
|
0.9965986 0.9965986 0.9965986 0.9965986 ]
|
|
|
|
mean value: 0.996937598865393
|
|
|
|
key: test_accuracy
|
|
value: [0.98484848 0.93939394 0.95384615 0.98461538 0.96923077 0.95384615
|
|
0.93846154 0.95384615 0.95384615 1. ]
|
|
|
|
mean value: 0.9631934731934733
|
|
|
|
key: train_accuracy
|
|
value: [0.99829352 0.99829352 1. 0.99829642 0.99829642 0.99829642
|
|
0.99829642 0.99829642 0.99829642 0.99829642]
|
|
|
|
mean value: 0.9984661988127286
|
|
|
|
key: test_fscore
|
|
value: [0.98507463 0.94285714 0.95652174 0.98507463 0.97058824 0.95652174
|
|
0.94117647 0.95522388 0.95522388 1. ]
|
|
|
|
mean value: 0.9648262341925739
|
|
|
|
key: train_fscore
|
|
value: [0.99829642 0.99829642 1. 0.99829642 0.99829642 0.99829642
|
|
0.99830221 0.99830221 0.99830221 0.99830221]
|
|
|
|
mean value: 0.9984690940959036
|
|
|
|
key: test_precision
|
|
value: [0.97058824 0.89189189 0.91666667 0.97058824 0.94285714 0.91666667
|
|
0.88888889 0.91428571 0.91428571 1. ]
|
|
|
|
mean value: 0.932671915613092
|
|
|
|
key: train_precision
|
|
value: [0.99659864 0.99659864 1. 0.99659864 0.99659864 0.99659864
|
|
0.99661017 0.99661017 0.99661017 0.99661017]
|
|
|
|
mean value: 0.9969433875245013
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.93939394 0.953125 0.984375 0.96875 0.953125
|
|
0.93939394 0.95454545 0.95454545 1. ]
|
|
|
|
mean value: 0.9632102272727273
|
|
|
|
key: train_roc_auc
|
|
value: [0.99829352 0.99829352 1. 0.99829932 0.99829932 0.99829932
|
|
0.99829352 0.99829352 0.99829352 0.99829352]
|
|
|
|
mean value: 0.9984659051333844
|
|
|
|
key: test_jcc
|
|
value: [0.97058824 0.89189189 0.91666667 0.97058824 0.94285714 0.91666667
|
|
0.88888889 0.91428571 0.91428571 1. ]
|
|
|
|
mean value: 0.932671915613092
|
|
|
|
key: train_jcc
|
|
value: [0.99659864 0.99659864 1. 0.99659864 0.99659864 0.99659864
|
|
0.99661017 0.99661017 0.99661017 0.99661017]
|
|
|
|
mean value: 0.9969433875245013
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04018974 0.0260396 0.02741075 0.02671456 0.0259552 0.0270834
|
|
0.02738142 0.02613425 0.02657294 0.02714205]
|
|
|
|
mean value: 0.02806239128112793
|
|
|
|
key: score_time
|
|
value: [0.01224422 0.00977659 0.00987625 0.00879931 0.00890756 0.00887895
|
|
0.009238 0.00953794 0.00898623 0.00918031]
|
|
|
|
mean value: 0.009542536735534669
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.94017476 0.96966868 0.96966868 0.94017476
|
|
1. 0.96969697 1. 0.96969697]
|
|
|
|
mean value: 0.9644396455835689
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.96923077 0.98461538 0.98461538 0.96923077
|
|
1. 0.98461538 1. 0.98461538]
|
|
|
|
mean value: 0.9816317016317017
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.97058824 0.98507463 0.98507463 0.97058824
|
|
1. 0.98461538 1. 0.98461538]
|
|
|
|
mean value: 0.9823413636407491
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.94285714 0.97058824 0.97058824 0.94285714
|
|
1. 0.96969697 1. 0.96969697]
|
|
|
|
mean value: 0.9658176587588352
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.96875 0.984375 0.984375 0.96875
|
|
1. 0.98484848 1. 0.98484848]
|
|
|
|
mean value: 0.9815340909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.94285714 0.97058824 0.97058824 0.94285714
|
|
1. 0.96969697 1. 0.96969697]
|
|
|
|
mean value: 0.9658176587588352
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13007593 0.12656021 0.12510514 0.12539434 0.12666392 0.1268909
|
|
0.12753057 0.12602592 0.12738371 0.12686872]
|
|
|
|
mean value: 0.12684993743896483
|
|
|
|
key: score_time
|
|
value: [0.01878166 0.01778889 0.01779699 0.01774406 0.01783943 0.01807499
|
|
0.01818442 0.01870155 0.01788616 0.02420855]
|
|
|
|
mean value: 0.018700671195983887
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.91144345 0.96966868 0.96966868 0.96966868
|
|
1. 0.96969697 0.96966868 0.96969697]
|
|
|
|
mean value: 0.9614827760603497
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.95384615 0.98461538 0.98461538 0.98461538
|
|
1. 0.98461538 0.98461538 0.98461538]
|
|
|
|
mean value: 0.9800932400932402
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.95652174 0.98507463 0.98507463 0.98507463
|
|
1. 0.98461538 0.98412698 0.98461538]
|
|
|
|
mean value: 0.9807960515942346
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.91666667 0.97058824 0.97058824 0.97058824
|
|
1. 0.96969697 1. 0.96969697]
|
|
|
|
mean value: 0.9659717203834851
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 0.96875
|
|
1. ]
|
|
|
|
mean value: 0.996875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.953125 0.984375 0.984375 0.984375
|
|
1. 0.98484848 0.984375 0.98484848]
|
|
|
|
mean value: 0.9799715909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.91666667 0.97058824 0.97058824 0.97058824
|
|
1. 0.96969697 0.96875 0.96969697]
|
|
|
|
mean value: 0.9628467203834851
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01043582 0.01042581 0.01088643 0.01115012 0.0105629 0.01053858
|
|
0.01073027 0.01055336 0.01113892 0.01059294]
|
|
|
|
mean value: 0.010701513290405274
|
|
|
|
key: score_time
|
|
value: [0.00881147 0.00887012 0.0090332 0.00886226 0.00879574 0.0087831
|
|
0.0088973 0.00887489 0.00885868 0.00885963]
|
|
|
|
mean value: 0.008864641189575195
|
|
|
|
key: test_mcc
|
|
value: [0.85839508 0.88531564 0.8291562 0.87689394 0.91144345 0.94017476
|
|
0.85663571 0.85663571 0.83005736 0.65648795]
|
|
|
|
mean value: 0.8501195782810751
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92424242 0.93939394 0.90769231 0.93846154 0.95384615 0.96923077
|
|
0.92307692 0.92307692 0.90769231 0.8 ]
|
|
|
|
mean value: 0.9186713286713287
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92957746 0.94285714 0.91666667 0.93939394 0.95652174 0.97058824
|
|
0.92753623 0.92753623 0.91428571 0.83116883]
|
|
|
|
mean value: 0.9256132197353695
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.86842105 0.89189189 0.84615385 0.93939394 0.91666667 0.94285714
|
|
0.86486486 0.86486486 0.84210526 0.71111111]
|
|
|
|
mean value: 0.8688330643593801
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.93939394 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9939393939393939
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92424242 0.93939394 0.90625 0.93844697 0.953125 0.96875
|
|
0.92424242 0.92424242 0.90909091 0.8030303 ]
|
|
|
|
mean value: 0.9190814393939394
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.86842105 0.89189189 0.84615385 0.88571429 0.91666667 0.94285714
|
|
0.86486486 0.86486486 0.84210526 0.71111111]
|
|
|
|
mean value: 0.8634650989914148
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.83368301 1.80051327 1.82413101 1.82221413 1.8274672 1.83522439
|
|
1.82569861 1.82355714 1.82971764 1.85142326]
|
|
|
|
mean value: 1.8273629665374755
|
|
|
|
key: score_time
|
|
value: [0.09318542 0.0931797 0.09321451 0.09349918 0.09294653 0.09291863
|
|
0.09288955 0.09271193 0.09319663 0.09328294]
|
|
|
|
mean value: 0.09310250282287598
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.96966868 0.94017476 0.96966868 0.96966868
|
|
1. 0.94028478 1. 0.96969697]
|
|
|
|
mean value: 0.9644478194983122
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.98461538 0.96923077 0.98461538 0.98461538
|
|
1. 0.96923077 1. 0.98461538]
|
|
|
|
mean value: 0.9816317016317017
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.98507463 0.97058824 0.98507463 0.98507463
|
|
1. 0.96969697 1. 0.98461538]
|
|
|
|
mean value: 0.982298161306063
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.97058824 0.94285714 0.97058824 0.97058824
|
|
1. 0.94117647 1. 0.96969697]
|
|
|
|
mean value: 0.9657387180916592
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.984375 0.96875 0.984375 0.984375
|
|
1. 0.96969697 1. 0.98484848]
|
|
|
|
mean value: 0.9815814393939394
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.97058824 0.94285714 0.97058824 0.97058824
|
|
1. 0.94117647 1. 0.96969697]
|
|
|
|
mean value: 0.9657387180916592
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.96573257 0.98738766 1.00982213 1.02626348 1.00719547 1.04622245
|
|
1.01944923 0.99987769 1.07083035 1.11324215]
|
|
|
|
mean value: 1.0246023178100585
|
|
|
|
key: score_time
|
|
value: [0.23722124 0.24880981 0.24401736 0.19855857 0.25242662 0.22853804
|
|
0.16347671 0.2721715 0.25464773 0.13172317]
|
|
|
|
mean value: 0.2231590747833252
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.94017476 0.94017476 0.96966868 1.
|
|
0.96969697 0.94028478 1. 0.96969697]
|
|
|
|
mean value: 0.9615012556379743
|
|
|
|
key: train_mcc
|
|
value: [0.9763879 0.98981298 0.97976246 0.98310733 0.97976246 0.97642854
|
|
0.97976106 0.97642665 0.97642665 0.97976106]
|
|
|
|
mean value: 0.9797637092135919
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.96923077 0.96923077 0.98461538 1.
|
|
0.98461538 0.96923077 1. 0.98461538]
|
|
|
|
mean value: 0.9800932400932401
|
|
|
|
key: train_accuracy
|
|
value: [0.98805461 0.99488055 0.98977853 0.99148211 0.98977853 0.98807496
|
|
0.98977853 0.98807496 0.98807496 0.98977853]
|
|
|
|
mean value: 0.9897756277944776
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.97058824 0.97058824 0.98507463 1.
|
|
0.98461538 0.96969697 1. 0.98461538]
|
|
|
|
mean value: 0.9808035979238788
|
|
|
|
key: train_fscore
|
|
value: [0.98819562 0.99490662 0.98986486 0.99153976 0.98986486 0.98819562
|
|
0.98989899 0.98823529 0.98823529 0.98989899]
|
|
|
|
mean value: 0.9898835913297229
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.94285714 0.94285714 0.97058824 1.
|
|
0.96969697 0.94117647 1. 0.96969697]
|
|
|
|
mean value: 0.962876482288247
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.97666667 0.98986486 0.97993311 0.98322148 0.97993311 0.97666667
|
|
0.98 0.97674419 0.97674419 0.98 ]
|
|
|
|
mean value: 0.9799774267537075
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.96875 0.96875 0.984375 1.
|
|
0.98484848 0.96969697 1. 0.98484848]
|
|
|
|
mean value: 0.9800662878787879
|
|
|
|
key: train_roc_auc
|
|
value: [0.98805461 0.99488055 0.98979592 0.9914966 0.98979592 0.98809524
|
|
0.98976109 0.98805461 0.98805461 0.98976109]
|
|
|
|
mean value: 0.9897750226370411
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.94285714 0.94285714 0.97058824 1.
|
|
0.96969697 0.94117647 1. 0.96969697]
|
|
|
|
mean value: 0.962876482288247
|
|
|
|
key: train_jcc
|
|
value: [0.97666667 0.98986486 0.97993311 0.98322148 0.97993311 0.97666667
|
|
0.98 0.97674419 0.97674419 0.98 ]
|
|
|
|
mean value: 0.9799774267537075
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02750015 0.01196432 0.01203775 0.01166177 0.0112555 0.01293898
|
|
0.01182771 0.01210523 0.01166701 0.01209378]
|
|
|
|
mean value: 0.013505220413208008
|
|
|
|
key: score_time
|
|
value: [0.00998569 0.00978303 0.00998259 0.00981021 0.0090344 0.00994062
|
|
0.00977063 0.00989413 0.00978208 0.00990629]
|
|
|
|
mean value: 0.009788966178894043
|
|
|
|
key: test_mcc
|
|
value: [0.42919754 0.53099079 0.58027158 0.47727273 0.72348485 0.54131274
|
|
0.63068182 0.63153153 0.64942422 0.47810304]
|
|
|
|
mean value: 0.5672270829566963
|
|
|
|
key: train_mcc
|
|
value: [0.58200069 0.60001567 0.58515758 0.56541828 0.58185441 0.60844172
|
|
0.56880028 0.60266953 0.58602719 0.60241221]
|
|
|
|
mean value: 0.5882797582957422
|
|
|
|
key: test_accuracy
|
|
value: [0.71212121 0.75757576 0.78461538 0.73846154 0.86153846 0.76923077
|
|
0.81538462 0.81538462 0.81538462 0.73846154]
|
|
|
|
mean value: 0.7808158508158508
|
|
|
|
key: train_accuracy
|
|
value: [0.79010239 0.79863481 0.79216354 0.78194208 0.79045997 0.80408859
|
|
0.78364566 0.80068143 0.79216354 0.80068143]
|
|
|
|
mean value: 0.7934563436458885
|
|
|
|
key: test_fscore
|
|
value: [0.68852459 0.78378378 0.76666667 0.73846154 0.86153846 0.76190476
|
|
0.8125 0.80645161 0.78571429 0.72131148]
|
|
|
|
mean value: 0.7726857176546494
|
|
|
|
key: train_fscore
|
|
value: [0.78152753 0.78853047 0.78596491 0.77304965 0.78383128 0.80069324
|
|
0.77601411 0.7943761 0.7844523 0.79509632]
|
|
|
|
mean value: 0.7863535905385026
|
|
|
|
key: test_precision
|
|
value: [0.75 0.70731707 0.85185185 0.75 0.875 0.8
|
|
0.8125 0.83333333 0.91666667 0.75862069]
|
|
|
|
mean value: 0.8055289614677756
|
|
|
|
key: train_precision
|
|
value: [0.81481481 0.83018868 0.80866426 0.80442804 0.80797101 0.81338028
|
|
0.80586081 0.82181818 0.81617647 0.81949458]
|
|
|
|
mean value: 0.8142797137556002
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.87878788 0.6969697 0.72727273 0.84848485 0.72727273
|
|
0.8125 0.78125 0.6875 0.6875 ]
|
|
|
|
mean value: 0.7483901515151515
|
|
|
|
key: train_recall
|
|
value: [0.75085324 0.75085324 0.76450512 0.7440273 0.76109215 0.7883959
|
|
0.74829932 0.76870748 0.75510204 0.77210884]
|
|
|
|
mean value: 0.7603944649532167
|
|
|
|
key: test_roc_auc
|
|
value: [0.71212121 0.75757576 0.78598485 0.73863636 0.86174242 0.76988636
|
|
0.81534091 0.81486742 0.81344697 0.73768939]
|
|
|
|
mean value: 0.7807291666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.79010239 0.79863481 0.79211651 0.7818776 0.79041002 0.8040619
|
|
0.78370597 0.80073599 0.79222679 0.80073019]
|
|
|
|
mean value: 0.7934602168512456
|
|
|
|
key: test_jcc
|
|
value: [0.525 0.64444444 0.62162162 0.58536585 0.75675676 0.61538462
|
|
0.68421053 0.67567568 0.64705882 0.56410256]
|
|
|
|
mean value: 0.6319620881489416
|
|
|
|
key: train_jcc
|
|
value: [0.64139942 0.65088757 0.64739884 0.6300578 0.64450867 0.66763006
|
|
0.63400576 0.65889213 0.64534884 0.65988372]
|
|
|
|
mean value: 0.6480012816704841
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.09386945 0.07413387 0.24434423 0.08141041 0.09000111 0.08292508
|
|
0.08711767 0.08184457 0.0863924 0.10627866]
|
|
|
|
mean value: 0.10283174514770507
|
|
|
|
key: score_time
|
|
value: [0.01123762 0.01102638 0.01161551 0.0113163 0.01165724 0.01128268
|
|
0.01213384 0.0116303 0.01125193 0.01118922]
|
|
|
|
mean value: 0.011434102058410644
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.96966868 0.91144345 0.96966868 0.96966868
|
|
1. 0.94028478 1. 1. ]
|
|
|
|
mean value: 0.9646049921753612
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.98461538 0.95384615 0.98461538 0.98461538
|
|
1. 0.96923077 1. 1. ]
|
|
|
|
mean value: 0.9816317016317017
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.98507463 0.95652174 0.98507463 0.98507463
|
|
1. 0.96969697 1. 1. ]
|
|
|
|
mean value: 0.9824299732281562
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.97058824 0.91666667 0.97058824 0.97058824
|
|
1. 0.94117647 1. 1. ]
|
|
|
|
mean value: 0.9661499735029146
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.984375 0.953125 0.984375 0.984375
|
|
1. 0.96969697 1. 1. ]
|
|
|
|
mean value: 0.9815340909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.97058824 0.91666667 0.97058824 0.97058824
|
|
1. 0.94117647 1. 1. ]
|
|
|
|
mean value: 0.9661499735029146
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04903173 0.05711269 0.04475141 0.0757432 0.0602169 0.08182859
|
|
0.04725313 0.0788002 0.06227136 0.04979157]
|
|
|
|
mean value: 0.060680079460144046
|
|
|
|
key: score_time
|
|
value: [0.01930285 0.01229572 0.01416039 0.01229095 0.01947045 0.01236129
|
|
0.01226354 0.01940012 0.01230478 0.01904869]
|
|
|
|
mean value: 0.015289878845214844
|
|
|
|
key: test_mcc
|
|
value: [0.9701425 0.74420841 0.87689394 0.96969697 0.96966868 0.87844611
|
|
0.94028478 0.88382395 0.87867338 1. ]
|
|
|
|
mean value: 0.9111838725828962
|
|
|
|
key: train_mcc
|
|
value: [0.95230718 0.96246294 0.96257212 0.94550795 0.95232236 0.94903173
|
|
0.93869211 0.95571215 0.95238704 0.94894121]
|
|
|
|
mean value: 0.9519936782040691
|
|
|
|
key: test_accuracy
|
|
value: [0.98484848 0.86363636 0.93846154 0.98461538 0.98461538 0.93846154
|
|
0.96923077 0.93846154 0.93846154 1. ]
|
|
|
|
mean value: 0.9540792540792541
|
|
|
|
key: train_accuracy
|
|
value: [0.97610922 0.98122867 0.98126065 0.97274276 0.97614991 0.97444634
|
|
0.9693356 0.97785349 0.97614991 0.97444634]
|
|
|
|
mean value: 0.9759722892476932
|
|
|
|
key: test_fscore
|
|
value: [0.98461538 0.87671233 0.93939394 0.98461538 0.98507463 0.94117647
|
|
0.96969697 0.94117647 0.93939394 1. ]
|
|
|
|
mean value: 0.9561855514524884
|
|
|
|
key: train_fscore
|
|
value: [0.97627119 0.98126065 0.98132428 0.97278912 0.97619048 0.97461929
|
|
0.96949153 0.97792869 0.97635135 0.97461929]
|
|
|
|
mean value: 0.9760845852229671
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 0.93939394 1. 0.97058824 0.91428571
|
|
0.94117647 0.88888889 0.91176471 1. ]
|
|
|
|
mean value: 0.9366097954333248
|
|
|
|
key: train_precision
|
|
value: [0.96969697 0.97959184 0.97635135 0.96949153 0.97288136 0.96644295
|
|
0.96621622 0.97627119 0.96979866 0.96969697]
|
|
|
|
mean value: 0.9716439022231066
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.96969697 0.93939394 0.96969697 1. 0.96969697
|
|
1. 1. 0.96875 1. ]
|
|
|
|
mean value: 0.9786931818181819
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.98293515 0.98634812 0.97610922 0.97952218 0.98293515
|
|
0.97278912 0.97959184 0.9829932 0.97959184]
|
|
|
|
mean value: 0.9805750969329712
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.86363636 0.93844697 0.98484848 0.984375 0.93797348
|
|
0.96969697 0.93939394 0.93892045 1. ]
|
|
|
|
mean value: 0.9542140151515152
|
|
|
|
key: train_roc_auc
|
|
value: [0.97610922 0.98122867 0.9812693 0.97274849 0.97615565 0.97446077
|
|
0.96932971 0.97785053 0.97613824 0.97443756]
|
|
|
|
mean value: 0.9759728123331244
|
|
|
|
key: test_jcc
|
|
value: [0.96969697 0.7804878 0.88571429 0.96969697 0.97058824 0.88888889
|
|
0.94117647 0.88888889 0.88571429 1. ]
|
|
|
|
mean value: 0.918085279936069
|
|
|
|
key: train_jcc
|
|
value: [0.95364238 0.9632107 0.96333333 0.94701987 0.95348837 0.95049505
|
|
0.94078947 0.95681063 0.95379538 0.95049505]
|
|
|
|
mean value: 0.9533080242884424
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01191735 0.01109195 0.01145744 0.01019454 0.01044583 0.01129651
|
|
0.01036239 0.0100286 0.01021838 0.01007605]
|
|
|
|
mean value: 0.010708904266357422
|
|
|
|
key: score_time
|
|
value: [0.00950074 0.00956059 0.00898194 0.00879025 0.00971723 0.00899887
|
|
0.00876164 0.00882864 0.00875211 0.00889969]
|
|
|
|
mean value: 0.009079170227050782
|
|
|
|
key: test_mcc
|
|
value: [0.42443734 0.64715023 0.57061637 0.53882576 0.78763191 0.48131798
|
|
0.65199287 0.54591405 0.60000027 0.48131798]
|
|
|
|
mean value: 0.5729204754535123
|
|
|
|
key: train_mcc
|
|
value: [0.62197601 0.58428511 0.61082676 0.57563129 0.58929329 0.59184703
|
|
0.59480594 0.61245804 0.58185441 0.61325931]
|
|
|
|
mean value: 0.5976237181144685
|
|
|
|
key: test_accuracy
|
|
value: [0.71212121 0.81818182 0.78461538 0.76923077 0.89230769 0.73846154
|
|
0.81538462 0.76923077 0.8 0.73846154]
|
|
|
|
mean value: 0.7837995337995338
|
|
|
|
key: train_accuracy
|
|
value: [0.8105802 0.79180887 0.80408859 0.78705281 0.79386712 0.7955707
|
|
0.79727428 0.80579216 0.79045997 0.80579216]
|
|
|
|
mean value: 0.7982286863847526
|
|
|
|
key: test_fscore
|
|
value: [0.71641791 0.83333333 0.78125 0.76923077 0.89855072 0.76056338
|
|
0.83333333 0.7826087 0.79365079 0.71186441]
|
|
|
|
mean value: 0.7880803347347197
|
|
|
|
key: train_fscore
|
|
value: [0.81530782 0.79666667 0.81239804 0.79406919 0.80065898 0.8
|
|
0.80067002 0.81125828 0.79669421 0.81311475]
|
|
|
|
mean value: 0.8040837964585462
|
|
|
|
key: test_precision
|
|
value: [0.70588235 0.76923077 0.80645161 0.78125 0.86111111 0.71052632
|
|
0.75 0.72972973 0.80645161 0.77777778]
|
|
|
|
mean value: 0.7698411282386489
|
|
|
|
key: train_precision
|
|
value: [0.79545455 0.77850163 0.778125 0.76751592 0.77388535 0.78175896
|
|
0.78877888 0.79032258 0.77491961 0.78481013]
|
|
|
|
mean value: 0.7814072604922253
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.90909091 0.75757576 0.75757576 0.93939394 0.81818182
|
|
0.9375 0.84375 0.78125 0.65625 ]
|
|
|
|
mean value: 0.8127840909090909
|
|
|
|
key: train_recall
|
|
value: [0.83617747 0.81569966 0.84982935 0.8225256 0.82935154 0.81911263
|
|
0.81292517 0.83333333 0.81972789 0.84353741]
|
|
|
|
mean value: 0.8282220055257599
|
|
|
|
key: test_roc_auc
|
|
value: [0.71212121 0.81818182 0.78503788 0.76941288 0.89157197 0.73721591
|
|
0.81723485 0.77035985 0.79971591 0.73721591]
|
|
|
|
mean value: 0.7838068181818182
|
|
|
|
key: train_roc_auc
|
|
value: [0.8105802 0.79180887 0.80416638 0.78711314 0.79392747 0.79561074
|
|
0.79724757 0.80574516 0.79041002 0.80572775]
|
|
|
|
mean value: 0.7982337303522091
|
|
|
|
key: test_jcc
|
|
value: [0.55813953 0.71428571 0.64102564 0.625 0.81578947 0.61363636
|
|
0.71428571 0.64285714 0.65789474 0.55263158]
|
|
|
|
mean value: 0.6535545900447981
|
|
|
|
key: train_jcc
|
|
value: [0.68820225 0.66204986 0.68406593 0.65846995 0.66758242 0.66666667
|
|
0.66759777 0.68245125 0.66208791 0.68508287]
|
|
|
|
mean value: 0.6724256876218178
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02177429 0.02907991 0.03356957 0.03469872 0.02976513 0.02188373
|
|
0.03032804 0.02910614 0.02598953 0.03564668]
|
|
|
|
mean value: 0.029184174537658692
|
|
|
|
key: score_time
|
|
value: [0.01015782 0.01128125 0.01198101 0.01198912 0.01193428 0.01202154
|
|
0.01189089 0.01198149 0.01193619 0.01201606]
|
|
|
|
mean value: 0.011718964576721192
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.85201287 0.90805728 0.87844611 0.91144345 0.74121539
|
|
0.94017476 0.84953768 0.91144345 0.96969697]
|
|
|
|
mean value: 0.8871536549688918
|
|
|
|
key: train_mcc
|
|
value: [0.96928892 0.96933409 0.96938669 0.96592835 0.9326412 0.85652373
|
|
0.94234858 0.92610248 0.92910679 0.95571215]
|
|
|
|
mean value: 0.9416372979073604
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.92424242 0.95384615 0.93846154 0.95384615 0.86153846
|
|
0.96923077 0.92307692 0.95384615 0.98461538]
|
|
|
|
mean value: 0.9417249417249418
|
|
|
|
key: train_accuracy
|
|
value: [0.98464164 0.98464164 0.9846678 0.98296422 0.96592845 0.92504259
|
|
0.97103918 0.96252129 0.96422487 0.97785349]
|
|
|
|
mean value: 0.9703525184457327
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.92753623 0.95522388 0.94117647 0.95652174 0.84745763
|
|
0.96774194 0.91803279 0.95081967 0.98461538]
|
|
|
|
mean value: 0.9404349609031051
|
|
|
|
key: train_fscore
|
|
value: [0.98461538 0.98471986 0.98471986 0.98293515 0.96655518 0.92
|
|
0.9707401 0.96167247 0.96360485 0.97792869]
|
|
|
|
mean value: 0.969749157302225
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.88888889 0.94117647 0.91428571 0.91666667 0.96153846
|
|
1. 0.96551724 1. 0.96969697]
|
|
|
|
mean value: 0.9498946883632482
|
|
|
|
key: train_precision
|
|
value: [0.98630137 0.97972973 0.97972973 0.98293515 0.94754098 0.9844358
|
|
0.9825784 0.98571429 0.98233216 0.97627119]
|
|
|
|
mean value: 0.9787568789022557
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.96969697 0.96969697 0.96969697 1. 0.75757576
|
|
0.9375 0.875 0.90625 1. ]
|
|
|
|
mean value: 0.9355113636363637
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.98976109 0.98976109 0.98293515 0.98634812 0.86348123
|
|
0.95918367 0.93877551 0.94557823 0.97959184]
|
|
|
|
mean value: 0.9618351094704093
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.92424242 0.95359848 0.93797348 0.953125 0.86316288
|
|
0.96875 0.92234848 0.953125 0.98484848]
|
|
|
|
mean value: 0.9415719696969698
|
|
|
|
key: train_roc_auc
|
|
value: [0.98464164 0.98464164 0.98467646 0.98296418 0.96596318 0.92493789
|
|
0.97105941 0.96256182 0.96425669 0.97785053]
|
|
|
|
mean value: 0.9703553435025888
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.86486486 0.91428571 0.88888889 0.91666667 0.73529412
|
|
0.9375 0.84848485 0.90625 0.96969697]
|
|
|
|
mean value: 0.8896217784820726
|
|
|
|
key: train_jcc
|
|
value: [0.96969697 0.96989967 0.96989967 0.96644295 0.93527508 0.85185185
|
|
0.94314381 0.9261745 0.92976589 0.95681063]
|
|
|
|
mean value: 0.9418961013448971
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01870584 0.02071786 0.02087641 0.02029848 0.01994467 0.02535534
|
|
0.02135015 0.02066016 0.02104807 0.02397323]
|
|
|
|
mean value: 0.021293020248413085
|
|
|
|
key: score_time
|
|
value: [0.01050353 0.01194453 0.01200318 0.01192403 0.01192117 0.0119741
|
|
0.01186943 0.01193142 0.01191711 0.01191044]
|
|
|
|
mean value: 0.011789894104003907
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.78824078 0.84953768 0.8291562 0.91144345 0.63932742
|
|
0.94028478 0.85663571 0.87844611 1. ]
|
|
|
|
mean value: 0.8602580723623745
|
|
|
|
key: train_mcc
|
|
value: [0.96928892 0.96589281 0.8665154 0.82731229 0.9293669 0.59520993
|
|
0.9524971 0.93921285 0.95238925 0.95911402]
|
|
|
|
mean value: 0.8956799458569592
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.89393939 0.92307692 0.90769231 0.95384615 0.8
|
|
0.96923077 0.92307692 0.93846154 1. ]
|
|
|
|
mean value: 0.9263869463869464
|
|
|
|
key: train_accuracy
|
|
value: [0.98464164 0.98293515 0.93015332 0.90630324 0.96422487 0.7649063
|
|
0.97614991 0.9693356 0.97614991 0.97955707]
|
|
|
|
mean value: 0.9434357030309726
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.89552239 0.92753623 0.91666667 0.95652174 0.76363636
|
|
0.96969697 0.92753623 0.93548387 1. ]
|
|
|
|
mean value: 0.9247824342523009
|
|
|
|
key: train_fscore
|
|
value: [0.98461538 0.98287671 0.93397746 0.91419657 0.96494157 0.69469027
|
|
0.97643098 0.96989967 0.9760274 0.97959184]
|
|
|
|
mean value: 0.9377247831270099
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.88235294 0.88888889 0.84615385 0.91666667 0.95454545
|
|
0.94117647 0.86486486 0.96666667 1. ]
|
|
|
|
mean value: 0.9202492270139329
|
|
|
|
key: train_precision
|
|
value: [0.98630137 0.9862543 0.88414634 0.84195402 0.94444444 0.98742138
|
|
0.96666667 0.95394737 0.98275862 0.97959184]
|
|
|
|
mean value: 0.9513486350451892
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.90909091 0.96969697 1. 1. 0.63636364
|
|
1. 1. 0.90625 1. ]
|
|
|
|
mean value: 0.9391098484848485
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.97952218 0.98976109 1. 0.98634812 0.53583618
|
|
0.98639456 0.98639456 0.96938776 0.97959184]
|
|
|
|
mean value: 0.9396171437858419
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.89393939 0.92234848 0.90625 0.953125 0.80255682
|
|
0.96969697 0.92424242 0.93797348 1. ]
|
|
|
|
mean value: 0.9264678030303031
|
|
|
|
key: train_roc_auc
|
|
value: [0.98464164 0.98293515 0.9302547 0.90646259 0.9642625 0.76451673
|
|
0.97613243 0.96930649 0.97616145 0.97955701]
|
|
|
|
mean value: 0.9434230688862576
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.81081081 0.86486486 0.84615385 0.91666667 0.61764706
|
|
0.94117647 0.86486486 0.87878788 1. ]
|
|
|
|
mean value: 0.8655258175846411
|
|
|
|
key: train_jcc
|
|
value: [0.96969697 0.96632997 0.87613293 0.84195402 0.93225806 0.53220339
|
|
0.95394737 0.94155844 0.95317726 0.96 ]
|
|
|
|
mean value: 0.8927258411380252
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19168162 0.18652463 0.18877339 0.18711591 0.18756223 0.18694925
|
|
0.18872786 0.18731856 0.1890502 0.18967414]
|
|
|
|
mean value: 0.18833777904510499
|
|
|
|
key: score_time
|
|
value: [0.01539207 0.01641464 0.01546192 0.01561141 0.01558447 0.01564121
|
|
0.01585484 0.0155468 0.01538491 0.01601601]
|
|
|
|
mean value: 0.015690827369689943
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.96966868 0.94017476 0.96966868 0.96966868
|
|
1. 0.96969697 1. 0.96969697]
|
|
|
|
mean value: 0.9673890382089856
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.98461538 0.96923077 0.98461538 0.98461538
|
|
1. 0.98461538 1. 0.98461538]
|
|
|
|
mean value: 0.9831701631701633
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.98507463 0.97058824 0.98507463 0.98507463
|
|
1. 0.98461538 1. 0.98461538]
|
|
|
|
mean value: 0.9837900027979045
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.97058824 0.94285714 0.97058824 0.97058824
|
|
1. 0.96969697 1. 0.96969697]
|
|
|
|
mean value: 0.9685907680025327
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.984375 0.96875 0.984375 0.984375
|
|
1. 0.98484848 1. 0.98484848]
|
|
|
|
mean value: 0.983096590909091
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.97058824 0.94285714 0.97058824 0.97058824
|
|
1. 0.96969697 1. 0.96969697]
|
|
|
|
mean value: 0.9685907680025327
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07231236 0.07584405 0.07159591 0.09159732 0.08213663 0.09455013
|
|
0.08881402 0.07496858 0.08687353 0.09348845]
|
|
|
|
mean value: 0.08321809768676758
|
|
|
|
key: score_time
|
|
value: [0.02371931 0.03964543 0.03809118 0.02287412 0.0395174 0.03245187
|
|
0.02435994 0.03845763 0.03063273 0.03672957]
|
|
|
|
mean value: 0.03264791965484619
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.96966868 0.94017476 0.96966868 0.94017476
|
|
1. 0.94028478 1. 0.96969697]
|
|
|
|
mean value: 0.9614984268728954
|
|
|
|
key: train_mcc
|
|
value: [1. 0.9931972 0.98983039 0.99659864 1. 0.99659864
|
|
0.9965986 0.9965986 0.98983004 0.99320865]
|
|
|
|
mean value: 0.9952460760299192
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.98461538 0.96923077 0.98461538 0.96923077
|
|
1. 0.96923077 1. 0.98461538]
|
|
|
|
mean value: 0.9800932400932401
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99658703 0.99488927 0.99829642 1. 0.99829642
|
|
0.99829642 0.99829642 0.99488927 0.99659284]
|
|
|
|
mean value: 0.9976144100563401
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.98507463 0.97058824 0.98507463 0.97058824
|
|
1. 0.96969697 1. 0.98461538]
|
|
|
|
mean value: 0.9808495221489075
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99659864 0.99490662 0.99829642 1. 0.99829642
|
|
0.99830221 0.99830221 0.99492386 0.99661017]
|
|
|
|
mean value: 0.9976236547443424
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.97058824 0.94285714 0.97058824 0.94285714
|
|
1. 0.94117647 1. 0.96969697]
|
|
|
|
mean value: 0.9629656088479618
|
|
|
|
key: train_precision
|
|
value: [1. 0.99322034 0.98986486 0.99659864 1. 0.99659864
|
|
0.99661017 0.99661017 0.98989899 0.99324324]
|
|
|
|
mean value: 0.9952645054884764
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.984375 0.96875 0.984375 0.96875
|
|
1. 0.96969697 1. 0.98484848]
|
|
|
|
mean value: 0.9800189393939394
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99658703 0.99489796 0.99829932 1. 0.99829932
|
|
0.99829352 0.99829352 0.99488055 0.99658703]
|
|
|
|
mean value: 0.9976138236864711
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.97058824 0.94285714 0.97058824 0.94285714
|
|
1. 0.94117647 1. 0.96969697]
|
|
|
|
mean value: 0.9629656088479618
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99322034 0.98986486 0.99659864 1. 0.99659864
|
|
0.99661017 0.99661017 0.98989899 0.99324324]
|
|
|
|
mean value: 0.9952645054884764
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22352338 0.25992131 0.29241633 0.16742349 0.21570444 0.24491692
|
|
0.23902869 0.24685073 0.24089813 0.31559348]
|
|
|
|
mean value: 0.2446276903152466
|
|
|
|
key: score_time
|
|
value: [0.0278883 0.02800488 0.02785397 0.01648283 0.02850795 0.02785277
|
|
0.02769923 0.02845788 0.027807 0.02868414]
|
|
|
|
mean value: 0.02692389488220215
|
|
|
|
key: test_mcc
|
|
value: [0.88531564 0.88531564 0.80282704 0.91144345 0.77695466 0.80282704
|
|
0.80403025 0.91168461 0.91168461 0.81706198]
|
|
|
|
mean value: 0.8509144915529278
|
|
|
|
key: train_mcc
|
|
value: [0.98981298 0.98981298 0.98646327 0.98646327 0.99320881 0.98983039
|
|
0.98310636 0.98646265 0.98983004 0.97957952]
|
|
|
|
mean value: 0.9874570253062251
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 0.93939394 0.89230769 0.95384615 0.87692308 0.89230769
|
|
0.89230769 0.95384615 0.95384615 0.90769231]
|
|
|
|
mean value: 0.9201864801864802
|
|
|
|
key: train_accuracy
|
|
value: [0.99488055 0.99488055 0.99318569 0.99318569 0.99659284 0.99488927
|
|
0.99148211 0.99318569 0.99488927 0.98977853]
|
|
|
|
mean value: 0.9936950189254089
|
|
|
|
key: test_fscore
|
|
value: [0.94285714 0.94285714 0.90410959 0.95652174 0.89189189 0.90410959
|
|
0.90140845 0.95522388 0.95522388 0.90909091]
|
|
|
|
mean value: 0.9263294215807969
|
|
|
|
key: train_fscore
|
|
value: [0.99490662 0.99490662 0.99322034 0.99322034 0.99659864 0.99490662
|
|
0.9915683 0.99324324 0.99492386 0.98983051]
|
|
|
|
mean value: 0.9937325087980247
|
|
|
|
key: test_precision
|
|
value: [0.89189189 0.89189189 0.825 0.91666667 0.80487805 0.825
|
|
0.82051282 0.91428571 0.91428571 0.88235294]
|
|
|
|
mean value: 0.8686765689491658
|
|
|
|
key: train_precision
|
|
value: [0.98986486 0.98986486 0.98653199 0.98653199 0.99322034 0.98986486
|
|
0.98327759 0.98657718 0.98989899 0.98648649]
|
|
|
|
mean value: 0.9882119156208393
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 0.9375]
|
|
|
|
mean value: 0.99375
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99319728]
|
|
|
|
mean value: 0.9993197278911564
|
|
|
|
key: test_roc_auc
|
|
value: [0.93939394 0.93939394 0.890625 0.953125 0.875 0.890625
|
|
0.89393939 0.95454545 0.95454545 0.90814394]
|
|
|
|
mean value: 0.9199337121212121
|
|
|
|
key: train_roc_auc
|
|
value: [0.99488055 0.99488055 0.99319728 0.99319728 0.99659864 0.99489796
|
|
0.99146758 0.99317406 0.99488055 0.9897727 ]
|
|
|
|
mean value: 0.9936947133802326
|
|
|
|
key: test_jcc
|
|
value: [0.89189189 0.89189189 0.825 0.91666667 0.80487805 0.825
|
|
0.82051282 0.91428571 0.91428571 0.83333333]
|
|
|
|
mean value: 0.8637746081648521
|
|
|
|
key: train_jcc
|
|
value: [0.98986486 0.98986486 0.98653199 0.98653199 0.99322034 0.98986486
|
|
0.98327759 0.98657718 0.98989899 0.97986577]
|
|
|
|
mean value: 0.9875498441533987
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.74850655 0.7487638 0.75244284 0.75276852 0.75158358 0.74764895
|
|
0.75177717 0.75058126 0.74825907 0.75402451]
|
|
|
|
mean value: 0.7506356239318848
|
|
|
|
key: score_time
|
|
value: [0.00954652 0.01034856 0.00933361 0.00950527 0.01000786 0.00935388
|
|
0.00940204 0.0093143 0.0098443 0.00949383]
|
|
|
|
mean value: 0.009615015983581544
|
|
|
|
key: test_mcc
|
|
value: [1. 0.88531564 0.96966868 0.94017476 0.96966868 0.91144345
|
|
1. 0.94028478 1. 0.96969697]
|
|
|
|
mean value: 0.9586252965196415
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.93939394 0.98461538 0.96923077 0.98461538 0.95384615
|
|
1. 0.96923077 1. 0.98461538]
|
|
|
|
mean value: 0.9785547785547786
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94285714 0.98507463 0.97058824 0.98507463 0.95652174
|
|
1. 0.96969697 1. 0.98461538]
|
|
|
|
mean value: 0.9794428725325393
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.89189189 0.97058824 0.94285714 0.97058824 0.91666667
|
|
1. 0.94117647 1. 0.96969697]
|
|
|
|
mean value: 0.9603465612289142
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.93939394 0.984375 0.96875 0.984375 0.953125
|
|
1. 0.96969697 1. 0.98484848]
|
|
|
|
mean value: 0.9784564393939394
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.89189189 0.97058824 0.94285714 0.97058824 0.91666667
|
|
1. 0.94117647 1. 0.96969697]
|
|
|
|
mean value: 0.9603465612289142
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03289318 0.03336477 0.03610873 0.03162313 0.0395174 0.03225017
|
|
0.03240514 0.03235316 0.03207421 0.03266597]
|
|
|
|
mean value: 0.03352558612823486
|
|
|
|
key: score_time
|
|
value: [0.01233625 0.01858902 0.01290846 0.01493025 0.01855946 0.02041245
|
|
0.01499772 0.01503062 0.01514864 0.0151844 ]
|
|
|
|
mean value: 0.01580972671508789
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 1. 0.96966868 0.94028478 0.96969697 1.
|
|
0.96969697 0.90805728 0.87844611 1. ]
|
|
|
|
mean value: 0.9576974742315519
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 1. 0.98461538 0.96923077 0.98461538 1.
|
|
0.98461538 0.95384615 0.93846154 1. ]
|
|
|
|
mean value: 0.9785081585081585
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 1. 0.98507463 0.96875 0.98461538 1.
|
|
0.98461538 0.95238095 0.93548387 1. ]
|
|
|
|
mean value: 0.9781508454739253
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94285714 1. 0.97058824 1. 1. 1.
|
|
0.96969697 0.96774194 0.96666667 1. ]
|
|
|
|
mean value: 0.9817550949998768
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.93939394 0.96969697 1.
|
|
1. 0.9375 0.90625 1. ]
|
|
|
|
mean value: 0.975284090909091
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 1. 0.984375 0.96969697 0.98484848 1.
|
|
0.98484848 0.95359848 0.93797348 1. ]
|
|
|
|
mean value: 0.9785037878787879
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 1. 0.97058824 0.93939394 0.96969697 1.
|
|
0.96969697 0.90909091 0.87878788 1. ]
|
|
|
|
mean value: 0.9580112044817928
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01766801 0.01648593 0.03942156 0.03964114 0.0395329 0.03955841
|
|
0.03969049 0.03983688 0.04054785 0.0395534 ]
|
|
|
|
mean value: 0.035193657875061034
|
|
|
|
key: score_time
|
|
value: [0.02938604 0.01217794 0.01870894 0.0188036 0.01890564 0.0188725
|
|
0.01900768 0.01918125 0.01913404 0.01896834]
|
|
|
|
mean value: 0.01931459903717041
|
|
|
|
key: test_mcc
|
|
value: [0.93939394 0.79708114 0.84644588 0.93844697 0.94017476 0.93844697
|
|
0.94028478 0.91168461 0.93844697 1. ]
|
|
|
|
mean value: 0.9190406017750745
|
|
|
|
key: train_mcc
|
|
value: [0.94541452 0.95563697 0.95920405 0.94550795 0.942084 0.94212842
|
|
0.94208333 0.94889833 0.94550668 0.93869211]
|
|
|
|
mean value: 0.946515635707813
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.89393939 0.92307692 0.96923077 0.96923077 0.96923077
|
|
0.96923077 0.95384615 0.96923077 1. ]
|
|
|
|
mean value: 0.9586713286713286
|
|
|
|
key: train_accuracy
|
|
value: [0.97269625 0.9778157 0.97955707 0.97274276 0.97103918 0.97103918
|
|
0.97103918 0.97444634 0.97274276 0.9693356 ]
|
|
|
|
mean value: 0.9732454023757057
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.90140845 0.92537313 0.96969697 0.97058824 0.96969697
|
|
0.96969697 0.95522388 0.96875 1. ]
|
|
|
|
mean value: 0.9600131579711595
|
|
|
|
key: train_fscore
|
|
value: [0.97278912 0.97777778 0.97966102 0.97278912 0.97103918 0.97113752
|
|
0.97113752 0.97444634 0.97288136 0.96949153]
|
|
|
|
mean value: 0.9733150469411342
|
|
|
|
key: test_precision
|
|
value: [0.96969697 0.84210526 0.91176471 0.96969697 0.94285714 0.96969697
|
|
0.94117647 0.91428571 0.96875 1. ]
|
|
|
|
mean value: 0.9430030205862249
|
|
|
|
key: train_precision
|
|
value: [0.96949153 0.97945205 0.97306397 0.96949153 0.96938776 0.96621622
|
|
0.96949153 0.97610922 0.96959459 0.96621622]
|
|
|
|
mean value: 0.9708514601275813
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.96969697 0.93939394 0.96969697 1. 0.96969697
|
|
1. 1. 0.96875 1. ]
|
|
|
|
mean value: 0.9786931818181819
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:148: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:151: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.97610922 0.97610922 0.98634812 0.97610922 0.97269625 0.97610922
|
|
0.97278912 0.97278912 0.97619048 0.97278912]
|
|
|
|
mean value: 0.9758039051798193
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.89393939 0.92282197 0.96922348 0.96875 0.96922348
|
|
0.96969697 0.95454545 0.96922348 1. ]
|
|
|
|
mean value: 0.9587121212121212
|
|
|
|
key: train_roc_auc
|
|
value: [0.97269625 0.9778157 0.97956862 0.97274849 0.971042 0.9710478
|
|
0.9710362 0.97444917 0.97273688 0.96932971]
|
|
|
|
mean value: 0.9732470804021267
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.82051282 0.86111111 0.94117647 0.94285714 0.94117647
|
|
0.94117647 0.91428571 0.93939394 1. ]
|
|
|
|
mean value: 0.9242866610513669
|
|
|
|
key: train_jcc
|
|
value: [0.94701987 0.95652174 0.96013289 0.94701987 0.94370861 0.94389439
|
|
0.94389439 0.95016611 0.94719472 0.94078947]
|
|
|
|
mean value: 0.94803420588576
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.27860427 0.30625582 0.2999301 0.29773641 0.29638553 0.30672407
|
|
0.34239507 0.32195616 0.29284167 0.299088 ]
|
|
|
|
mean value: 0.3041917085647583
|
|
|
|
key: score_time
|
|
value: [0.01983547 0.01884723 0.01885319 0.0189271 0.01889658 0.01881385
|
|
0.02004528 0.01894379 0.01895666 0.0188508 ]
|
|
|
|
mean value: 0.019096994400024415
|
|
|
|
key: test_mcc
|
|
value: [0.93939394 0.79708114 0.84644588 0.93844697 0.94017476 0.90805728
|
|
0.94028478 0.91168461 0.93844697 1. ]
|
|
|
|
mean value: 0.916001632801491
|
|
|
|
key: train_mcc
|
|
value: [0.94541452 0.95563697 0.95920405 0.94550795 0.942084 0.94212842
|
|
0.94208333 0.94889833 0.94550668 0.93869211]
|
|
|
|
mean value: 0.946515635707813
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.89393939 0.92307692 0.96923077 0.96923077 0.95384615
|
|
0.96923077 0.95384615 0.96923077 1. ]
|
|
|
|
mean value: 0.9571328671328672
|
|
|
|
key: train_accuracy
|
|
value: [0.97269625 0.9778157 0.97955707 0.97274276 0.97103918 0.97103918
|
|
0.97103918 0.97444634 0.97274276 0.9693356 ]
|
|
|
|
mean value: 0.9732454023757057
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.90140845 0.92537313 0.96969697 0.97058824 0.95522388
|
|
0.96969697 0.95522388 0.96875 1. ]
|
|
|
|
mean value: 0.958565849061164
|
|
|
|
key: train_fscore
|
|
value: [0.97278912 0.97777778 0.97966102 0.97278912 0.97103918 0.97113752
|
|
0.97113752 0.97444634 0.97288136 0.96949153]
|
|
|
|
mean value: 0.9733150469411342
|
|
|
|
key: test_precision
|
|
value: [0.96969697 0.84210526 0.91176471 0.96969697 0.94285714 0.94117647
|
|
0.94117647 0.91428571 0.96875 1. ]
|
|
|
|
mean value: 0.9401509706753515
|
|
|
|
key: train_precision
|
|
value: [0.96949153 0.97945205 0.97306397 0.96949153 0.96938776 0.96621622
|
|
0.96949153 0.97610922 0.96959459 0.96621622]
|
|
|
|
mean value: 0.9708514601275813
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.96969697 0.93939394 0.96969697 1. 0.96969697
|
|
1. 1. 0.96875 1. ]
|
|
|
|
mean value: 0.9786931818181819
|
|
|
|
key: train_recall
|
|
value: [0.97610922 0.97610922 0.98634812 0.97610922 0.97269625 0.97610922
|
|
0.97278912 0.97278912 0.97619048 0.97278912]
|
|
|
|
mean value: 0.9758039051798193
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.89393939 0.92282197 0.96922348 0.96875 0.95359848
|
|
0.96969697 0.95454545 0.96922348 1. ]
|
|
|
|
mean value: 0.9571496212121212
|
|
|
|
key: train_roc_auc
|
|
value: [0.97269625 0.9778157 0.97956862 0.97274849 0.971042 0.9710478
|
|
0.9710362 0.97444917 0.97273688 0.96932971]
|
|
|
|
mean value: 0.9732470804021267
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.82051282 0.86111111 0.94117647 0.94285714 0.91428571
|
|
0.94117647 0.91428571 0.93939394 1. ]
|
|
|
|
mean value: 0.9215975854211148
|
|
|
|
key: train_jcc
|
|
value: [0.94701987 0.95652174 0.96013289 0.94701987 0.94370861 0.94389439
|
|
0.94389439 0.95016611 0.94719472 0.94078947]
|
|
|
|
mean value: 0.94803420588576
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0257926 0.02790737 0.02318335 0.03167439 0.05734181 0.04880977
|
|
0.03250027 0.03194118 0.03827858 0.0335741 ]
|
|
|
|
mean value: 0.035100340843200684
|
|
|
|
key: score_time
|
|
value: [0.01167178 0.01159739 0.01160884 0.01431298 0.01217198 0.01262093
|
|
0.01354647 0.01503205 0.01518011 0.01170874]
|
|
|
|
mean value: 0.012945127487182618
|
|
|
|
key: test_mcc
|
|
value: [0.56980288 0.67082039 0.77777778 0.4472136 0.3721042 0.89442719
|
|
0.65277778 0.41666667 0.6479516 0.41666667]
|
|
|
|
mean value: 0.5866208749996378
|
|
|
|
key: train_mcc
|
|
value: [0.88614695 0.83550998 0.84837318 0.86082846 0.88614695 0.86138081
|
|
0.88859066 0.86227649 0.84929565 0.8868492 ]
|
|
|
|
mean value: 0.8665398312973626
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.83333333 0.88888889 0.72222222 0.66666667 0.94444444
|
|
0.82352941 0.70588235 0.82352941 0.70588235]
|
|
|
|
mean value: 0.7892156862745098
|
|
|
|
key: train_accuracy
|
|
value: [0.94303797 0.91772152 0.92405063 0.93037975 0.94303797 0.93037975
|
|
0.94339623 0.93081761 0.9245283 0.94339623]
|
|
|
|
mean value: 0.9330745959716583
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.84210526 0.88888889 0.73684211 0.57142857 0.94117647
|
|
0.82352941 0.70588235 0.84210526 0.70588235]
|
|
|
|
mean value: 0.7857840680131701
|
|
|
|
key: train_fscore
|
|
value: [0.94267516 0.91719745 0.92307692 0.93081761 0.94267516 0.92903226
|
|
0.94193548 0.92993631 0.92307692 0.94267516]
|
|
|
|
mean value: 0.9323098433821013
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.8 0.88888889 0.7 0.8 1.
|
|
0.77777778 0.66666667 0.8 0.75 ]
|
|
|
|
mean value: 0.791060606060606
|
|
|
|
key: train_precision
|
|
value: [0.94871795 0.92307692 0.93506494 0.925 0.94871795 0.94736842
|
|
0.97333333 0.94805195 0.93506494 0.94871795]
|
|
|
|
mean value: 0.9433114341798552
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 0.88888889 0.77777778 0.44444444 0.88888889
|
|
0.875 0.75 0.88888889 0.66666667]
|
|
|
|
mean value: 0.7958333333333333
|
|
|
|
key: train_recall
|
|
value: [0.93670886 0.91139241 0.91139241 0.93670886 0.93670886 0.91139241
|
|
0.9125 0.9125 0.91139241 0.93670886]
|
|
|
|
mean value: 0.9217405063291139
|
|
|
|
key: test_roc_auc
|
|
value: [0.77777778 0.83333333 0.88888889 0.72222222 0.66666667 0.94444444
|
|
0.82638889 0.70833333 0.81944444 0.70833333]
|
|
|
|
mean value: 0.7895833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.94303797 0.91772152 0.92405063 0.93037975 0.94303797 0.93037975
|
|
0.94359177 0.93093354 0.9244462 0.94335443]
|
|
|
|
mean value: 0.9330933544303798
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.72727273 0.8 0.58333333 0.4 0.88888889
|
|
0.7 0.54545455 0.72727273 0.54545455]
|
|
|
|
mean value: 0.6584343434343434
|
|
|
|
key: train_jcc
|
|
value: [0.89156627 0.84705882 0.85714286 0.87058824 0.89156627 0.86746988
|
|
0.8902439 0.86904762 0.85714286 0.89156627]
|
|
|
|
mean value: 0.8733392969294682
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.79543591 0.73526001 0.70240498 0.83575439 0.69058466 0.73388934
|
|
0.82162809 0.70372939 0.68670249 0.91151547]
|
|
|
|
mean value: 0.7616904735565185
|
|
|
|
key: score_time
|
|
value: [0.01407862 0.01623702 0.01558208 0.01444364 0.01650882 0.01581383
|
|
0.01443124 0.01586819 0.01206326 0.01457977]
|
|
|
|
mean value: 0.014960646629333496
|
|
|
|
key: test_mcc
|
|
value: [0.67082039 0.77777778 0.67082039 0.4472136 0.3721042 0.67082039
|
|
0.65277778 0.54935027 0.52777778 0.65277778]
|
|
|
|
mean value: 0.5992240355702041
|
|
|
|
key: train_mcc
|
|
value: [0.98742088 0.9243469 0.91146543 0.92405063 0.9621024 0.97468354
|
|
0.93718354 0.96234177 0.77385663 0.96233582]
|
|
|
|
mean value: 0.9319787560885171
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.88888889 0.83333333 0.72222222 0.66666667 0.83333333
|
|
0.82352941 0.76470588 0.76470588 0.82352941]
|
|
|
|
mean value: 0.7954248366013071
|
|
|
|
key: train_accuracy
|
|
value: [0.99367089 0.96202532 0.9556962 0.96202532 0.98101266 0.98734177
|
|
0.96855346 0.98113208 0.88679245 0.98113208]
|
|
|
|
mean value: 0.9659382214791816
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.88888889 0.84210526 0.73684211 0.57142857 0.84210526
|
|
0.82352941 0.77777778 0.77777778 0.82352941]
|
|
|
|
mean value: 0.792608973413927
|
|
|
|
key: train_fscore
|
|
value: [0.99363057 0.96153846 0.95541401 0.96202532 0.98113208 0.98734177
|
|
0.96855346 0.98113208 0.8875 0.98089172]
|
|
|
|
mean value: 0.9659159465941434
|
|
|
|
key: test_precision
|
|
value: [0.8 0.88888889 0.8 0.7 0.8 0.8
|
|
0.77777778 0.7 0.77777778 0.875 ]
|
|
|
|
mean value: 0.7919444444444445
|
|
|
|
key: train_precision
|
|
value: [1. 0.97402597 0.96153846 0.96202532 0.975 0.98734177
|
|
0.97468354 0.98734177 0.87654321 0.98717949]
|
|
|
|
mean value: 0.9685679537683757
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 0.88888889 0.77777778 0.44444444 0.88888889
|
|
0.875 0.875 0.77777778 0.77777778]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [0.98734177 0.94936709 0.94936709 0.96202532 0.98734177 0.98734177
|
|
0.9625 0.975 0.89873418 0.97468354]
|
|
|
|
mean value: 0.9633702531645569
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.88888889 0.83333333 0.72222222 0.66666667 0.83333333
|
|
0.82638889 0.77083333 0.76388889 0.82638889]
|
|
|
|
mean value: 0.7965277777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.99367089 0.96202532 0.9556962 0.96202532 0.98101266 0.98734177
|
|
0.96859177 0.98117089 0.88686709 0.98109177]
|
|
|
|
mean value: 0.9659493670886076
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.8 0.72727273 0.58333333 0.4 0.72727273
|
|
0.7 0.63636364 0.63636364 0.7 ]
|
|
|
|
mean value: 0.6637878787878788
|
|
|
|
key: train_jcc
|
|
value: [0.98734177 0.92592593 0.91463415 0.92682927 0.96296296 0.975
|
|
0.93902439 0.96296296 0.79775281 0.9625 ]
|
|
|
|
mean value: 0.9354934237870564
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01273727 0.00978827 0.00896478 0.0088222 0.00867915 0.00854111
|
|
0.00854087 0.00868607 0.00857282 0.00857306]
|
|
|
|
mean value: 0.009190559387207031
|
|
|
|
key: score_time
|
|
value: [0.01181936 0.00904059 0.00885773 0.0085566 0.00860143 0.00856304
|
|
0.00852323 0.00849891 0.00854683 0.00855064]
|
|
|
|
mean value: 0.008955836296081543
|
|
|
|
key: test_mcc
|
|
value: [ 0.56980288 0.55555556 0.47140452 0.4472136 0.11111111 0.77777778
|
|
0.42600643 0.18055556 0.6479516 -0.18055556]
|
|
|
|
mean value: 0.4006823471940616
|
|
|
|
key: train_mcc
|
|
value: [0.65955018 0.67174771 0.54433105 0.70937243 0.50155039 0.68359907
|
|
0.69017683 0.63906026 0.67502174 0.68608524]
|
|
|
|
mean value: 0.6460494891843794
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.77777778 0.72222222 0.72222222 0.55555556 0.88888889
|
|
0.70588235 0.58823529 0.82352941 0.41176471]
|
|
|
|
mean value: 0.6973856209150326
|
|
|
|
key: train_accuracy
|
|
value: [0.82911392 0.83544304 0.75316456 0.85443038 0.7278481 0.84177215
|
|
0.8427673 0.81761006 0.83647799 0.8427673 ]
|
|
|
|
mean value: 0.8181394793408169
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.77777778 0.76190476 0.70588235 0.55555556 0.88888889
|
|
0.61538462 0.58823529 0.84210526 0.44444444]
|
|
|
|
mean value: 0.6980178954172762
|
|
|
|
key: train_fscore
|
|
value: [0.83435583 0.83950617 0.79144385 0.85714286 0.77486911 0.8427673
|
|
0.83443709 0.82840237 0.84146341 0.8447205 ]
|
|
|
|
mean value: 0.8289108478500907
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.77777778 0.66666667 0.75 0.55555556 0.88888889
|
|
0.8 0.55555556 0.8 0.44444444]
|
|
|
|
mean value: 0.6966161616161616
|
|
|
|
key: train_precision
|
|
value: [0.80952381 0.81927711 0.68518519 0.84146341 0.66071429 0.8375
|
|
0.88732394 0.78651685 0.81176471 0.82926829]
|
|
|
|
mean value: 0.7968537599650998
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.77777778 0.88888889 0.66666667 0.55555556 0.88888889
|
|
0.5 0.625 0.88888889 0.44444444]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_recall
|
|
value: [0.86075949 0.86075949 0.93670886 0.87341772 0.93670886 0.84810127
|
|
0.7875 0.875 0.87341772 0.86075949]
|
|
|
|
mean value: 0.8713132911392405
|
|
|
|
key: test_roc_auc
|
|
value: [0.77777778 0.77777778 0.72222222 0.72222222 0.55555556 0.88888889
|
|
0.69444444 0.59027778 0.81944444 0.40972222]
|
|
|
|
mean value: 0.6958333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.82911392 0.83544304 0.75316456 0.85443038 0.7278481 0.84177215
|
|
0.84311709 0.81724684 0.83670886 0.84287975]
|
|
|
|
mean value: 0.8181724683544304
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.63636364 0.61538462 0.54545455 0.38461538 0.8
|
|
0.44444444 0.41666667 0.72727273 0.28571429]
|
|
|
|
mean value: 0.5522582972582972
|
|
|
|
key: train_jcc
|
|
value: [0.71578947 0.72340426 0.65486726 0.75 0.63247863 0.72826087
|
|
0.71590909 0.70707071 0.72631579 0.7311828 ]
|
|
|
|
mean value: 0.7085278870836784
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00883412 0.00897193 0.00890326 0.00889039 0.00874805 0.00873041
|
|
0.00873351 0.00878525 0.00875974 0.00875354]
|
|
|
|
mean value: 0.008811020851135254
|
|
|
|
key: score_time
|
|
value: [0.0085361 0.00855994 0.00862718 0.00873303 0.00857043 0.00848055
|
|
0.00854826 0.00856686 0.00855255 0.00853872]
|
|
|
|
mean value: 0.008571362495422364
|
|
|
|
key: test_mcc
|
|
value: [0.11111111 0.2236068 0.4472136 0.56980288 0.26726124 0.33333333
|
|
0.16903085 0.16903085 0.52777778 0.18055556]
|
|
|
|
mean value: 0.2998723997129735
|
|
|
|
key: train_mcc
|
|
value: [0.62045203 0.58609427 0.62345811 0.58344823 0.54500286 0.55700665
|
|
0.53581684 0.5346519 0.61038989 0.59809901]
|
|
|
|
mean value: 0.5794419778966067
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.61111111 0.72222222 0.77777778 0.61111111 0.66666667
|
|
0.58823529 0.58823529 0.76470588 0.58823529]
|
|
|
|
mean value: 0.6473856209150327
|
|
|
|
key: train_accuracy
|
|
value: [0.81012658 0.79113924 0.81012658 0.79113924 0.7721519 0.77848101
|
|
0.7672956 0.7672956 0.80503145 0.79874214]
|
|
|
|
mean value: 0.7891529336836239
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.63157895 0.73684211 0.75 0.46153846 0.66666667
|
|
0.53333333 0.53333333 0.77777778 0.58823529]
|
|
|
|
mean value: 0.6234861474954354
|
|
|
|
key: train_fscore
|
|
value: [0.80769231 0.77852349 0.8 0.79754601 0.77777778 0.77707006
|
|
0.76129032 0.7672956 0.8 0.79220779]
|
|
|
|
mean value: 0.7859403363639892
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.6 0.7 0.85714286 0.75 0.66666667
|
|
0.57142857 0.57142857 0.77777778 0.625 ]
|
|
|
|
mean value: 0.6675
|
|
|
|
key: train_precision
|
|
value: [0.81818182 0.82857143 0.84507042 0.77380952 0.75903614 0.78205128
|
|
0.78666667 0.7721519 0.81578947 0.81333333]
|
|
|
|
mean value: 0.7994661992145965
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.66666667 0.77777778 0.66666667 0.33333333 0.66666667
|
|
0.5 0.5 0.77777778 0.55555556]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_recall
|
|
value: [0.79746835 0.73417722 0.75949367 0.82278481 0.79746835 0.7721519
|
|
0.7375 0.7625 0.78481013 0.7721519 ]
|
|
|
|
mean value: 0.7740506329113924
|
|
|
|
key: test_roc_auc
|
|
value: [0.55555556 0.61111111 0.72222222 0.77777778 0.61111111 0.66666667
|
|
0.58333333 0.58333333 0.76388889 0.59027778]
|
|
|
|
mean value: 0.6465277777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.81012658 0.79113924 0.81012658 0.79113924 0.7721519 0.77848101
|
|
0.76748418 0.76732595 0.80490506 0.79857595]
|
|
|
|
mean value: 0.7891455696202532
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.46153846 0.58333333 0.6 0.3 0.5
|
|
0.36363636 0.36363636 0.63636364 0.41666667]
|
|
|
|
mean value: 0.460979020979021
|
|
|
|
key: train_jcc
|
|
value: [0.67741935 0.63736264 0.66666667 0.66326531 0.63636364 0.63541667
|
|
0.61458333 0.62244898 0.66666667 0.65591398]
|
|
|
|
mean value: 0.6476107226107226
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00875807 0.00828743 0.00949121 0.00866389 0.00851631 0.00862336
|
|
0.0086143 0.0088408 0.00863338 0.00873017]
|
|
|
|
mean value: 0.00871589183807373
|
|
|
|
key: score_time
|
|
value: [0.00956774 0.00985765 0.01549387 0.01442194 0.01471853 0.01451778
|
|
0.01004767 0.00984645 0.00971317 0.00975966]
|
|
|
|
mean value: 0.011794447898864746
|
|
|
|
key: test_mcc
|
|
value: [ 0.47140452 0.4472136 0.34188173 -0.2236068 0.1490712 0.12403473
|
|
0.44970061 0.16903085 0.60858062 -0.05555556]
|
|
|
|
mean value: 0.24817555070683373
|
|
|
|
key: train_mcc
|
|
value: [0.54149615 0.55582283 0.54439964 0.53687549 0.56244099 0.54871762
|
|
0.55632843 0.51305743 0.54381155 0.56692517]
|
|
|
|
mean value: 0.5469875304507212
|
|
|
|
key: test_accuracy
|
|
value: [0.72222222 0.72222222 0.66666667 0.38888889 0.55555556 0.55555556
|
|
0.70588235 0.58823529 0.76470588 0.47058824]
|
|
|
|
mean value: 0.6140522875816994
|
|
|
|
key: train_accuracy
|
|
value: [0.76582278 0.7721519 0.76582278 0.76582278 0.77848101 0.7721519
|
|
0.77358491 0.75471698 0.7672956 0.77987421]
|
|
|
|
mean value: 0.769572486267017
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.70588235 0.7 0.42105263 0.33333333 0.42857143
|
|
0.73684211 0.53333333 0.71428571 0.47058824]
|
|
|
|
mean value: 0.5805793896505971
|
|
|
|
key: train_fscore
|
|
value: [0.74125874 0.74647887 0.73758865 0.74829932 0.76190476 0.75675676
|
|
0.75342466 0.74172185 0.74125874 0.75862069]
|
|
|
|
mean value: 0.7487313048122654
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.63636364 0.4 0.66666667 0.6
|
|
0.63636364 0.57142857 1. 0.5 ]
|
|
|
|
mean value: 0.6427489177489177
|
|
|
|
key: train_precision
|
|
value: [0.828125 0.84126984 0.83870968 0.80882353 0.82352941 0.8115942
|
|
0.83333333 0.78873239 0.828125 0.83333333]
|
|
|
|
mean value: 0.8235575723797082
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.66666667 0.77777778 0.44444444 0.22222222 0.33333333
|
|
0.875 0.5 0.55555556 0.44444444]
|
|
|
|
mean value: 0.5708333333333333
|
|
|
|
key: train_recall
|
|
value: [0.67088608 0.67088608 0.65822785 0.69620253 0.70886076 0.70886076
|
|
0.6875 0.7 0.67088608 0.69620253]
|
|
|
|
mean value: 0.6868512658227848
|
|
|
|
key: test_roc_auc
|
|
value: [0.72222222 0.72222222 0.66666667 0.38888889 0.55555556 0.55555556
|
|
0.71527778 0.58333333 0.77777778 0.47222222]
|
|
|
|
mean value: 0.6159722222222223
|
|
|
|
key: train_roc_auc
|
|
value: [0.76582278 0.7721519 0.76582278 0.76582278 0.77848101 0.7721519
|
|
0.77412975 0.75506329 0.76669304 0.77935127]
|
|
|
|
mean value: 0.7695490506329115
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.54545455 0.53846154 0.26666667 0.2 0.27272727
|
|
0.58333333 0.36363636 0.55555556 0.30769231]
|
|
|
|
mean value: 0.4248912198912199
|
|
|
|
key: train_jcc
|
|
value: [0.58888889 0.59550562 0.58426966 0.59782609 0.61538462 0.60869565
|
|
0.6043956 0.58947368 0.58888889 0.61111111]
|
|
|
|
mean value: 0.5984439812908946
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01182699 0.01088428 0.0105989 0.01070714 0.01067519 0.01060295
|
|
0.0107522 0.01132798 0.01073313 0.01074529]
|
|
|
|
mean value: 0.010885405540466308
|
|
|
|
key: score_time
|
|
value: [0.00978112 0.00912356 0.00916409 0.00913835 0.00921488 0.00921655
|
|
0.00910473 0.01053929 0.00921345 0.00933743]
|
|
|
|
mean value: 0.009383344650268554
|
|
|
|
key: test_mcc
|
|
value: [0.62017367 0.67082039 0.79772404 0.4472136 0.26726124 0.89442719
|
|
0.65277778 0.41666667 0.88741197 0.29012943]
|
|
|
|
mean value: 0.594460596832796
|
|
|
|
key: train_mcc
|
|
value: [0.77239946 0.73707642 0.77314359 0.75289162 0.81332571 0.76004189
|
|
0.79895529 0.75044075 0.74944601 0.77766186]
|
|
|
|
mean value: 0.768538260425022
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.83333333 0.88888889 0.72222222 0.61111111 0.94444444
|
|
0.82352941 0.70588235 0.94117647 0.64705882]
|
|
|
|
mean value: 0.7895424836601307
|
|
|
|
key: train_accuracy
|
|
value: [0.88607595 0.86708861 0.88607595 0.87341772 0.90506329 0.87974684
|
|
0.89937107 0.87421384 0.87421384 0.88679245]
|
|
|
|
mean value: 0.8832059549398933
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.84210526 0.9 0.73684211 0.46153846 0.94736842
|
|
0.82352941 0.70588235 0.94736842 0.7 ]
|
|
|
|
mean value: 0.7882816254952478
|
|
|
|
key: train_fscore
|
|
value: [0.8875 0.87272727 0.88888889 0.88095238 0.90909091 0.88198758
|
|
0.90123457 0.87951807 0.87654321 0.89156627]
|
|
|
|
mean value: 0.8870009144426378
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.8 0.81818182 0.7 0.75 0.9
|
|
0.77777778 0.66666667 0.9 0.63636364]
|
|
|
|
mean value: 0.7641297591297591
|
|
|
|
key: train_precision
|
|
value: [0.87654321 0.8372093 0.86746988 0.83146067 0.87209302 0.86585366
|
|
0.8902439 0.84883721 0.85542169 0.85057471]
|
|
|
|
mean value: 0.8595707258801916
|
|
|
|
key: test_recall
|
|
value: [1. 0.88888889 1. 0.77777778 0.33333333 1.
|
|
0.875 0.75 1. 0.77777778]
|
|
|
|
mean value: 0.8402777777777778
|
|
|
|
key: train_recall
|
|
value: [0.89873418 0.91139241 0.91139241 0.93670886 0.94936709 0.89873418
|
|
0.9125 0.9125 0.89873418 0.93670886]
|
|
|
|
mean value: 0.9166772151898734
|
|
|
|
key: test_roc_auc
|
|
value: [0.77777778 0.83333333 0.88888889 0.72222222 0.61111111 0.94444444
|
|
0.82638889 0.70833333 0.9375 0.63888889]
|
|
|
|
mean value: 0.7888888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.88607595 0.86708861 0.88607595 0.87341772 0.90506329 0.87974684
|
|
0.89928797 0.87397152 0.87436709 0.88710443]
|
|
|
|
mean value: 0.8832199367088608
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.72727273 0.81818182 0.58333333 0.3 0.9
|
|
0.7 0.54545455 0.9 0.53846154]
|
|
|
|
mean value: 0.6705011655011655
|
|
|
|
key: train_jcc
|
|
value: [0.79775281 0.77419355 0.8 0.78723404 0.83333333 0.78888889
|
|
0.82022472 0.78494624 0.78021978 0.80434783]
|
|
|
|
mean value: 0.7971141184118274
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.82319236 0.70165181 0.69482183 0.65259123 0.64576364 0.75334024
|
|
0.64439893 0.64480519 0.76589203 0.63085938]
|
|
|
|
mean value: 0.6957316637039185
|
|
|
|
key: score_time
|
|
value: [0.01468158 0.02285886 0.0164628 0.01601267 0.01622725 0.01623726
|
|
0.01622391 0.01489711 0.01475215 0.01502585]
|
|
|
|
mean value: 0.016337943077087403
|
|
|
|
key: test_mcc
|
|
value: [0.67082039 0.55555556 0.4472136 0.4472136 0.3721042 0.67082039
|
|
0.41666667 0.18055556 0.29166667 0.18055556]
|
|
|
|
mean value: 0.4233172181267415
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.77777778 0.72222222 0.72222222 0.66666667 0.83333333
|
|
0.70588235 0.58823529 0.64705882 0.58823529]
|
|
|
|
mean value: 0.7084967320261438
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.77777778 0.73684211 0.73684211 0.57142857 0.84210526
|
|
0.70588235 0.58823529 0.66666667 0.58823529]
|
|
|
|
mean value: 0.7056120693891592
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.77777778 0.7 0.7 0.8 0.8
|
|
0.66666667 0.55555556 0.66666667 0.625 ]
|
|
|
|
mean value: 0.7091666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.77777778 0.77777778 0.77777778 0.44444444 0.88888889
|
|
0.75 0.625 0.66666667 0.55555556]
|
|
|
|
mean value: 0.7152777777777778
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.77777778 0.72222222 0.72222222 0.66666667 0.83333333
|
|
0.70833333 0.59027778 0.64583333 0.59027778]
|
|
|
|
mean value: 0.7090277777777778
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.63636364 0.58333333 0.58333333 0.4 0.72727273
|
|
0.54545455 0.41666667 0.5 0.41666667]
|
|
|
|
mean value: 0.5536363636363637
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01664901 0.01520634 0.01341438 0.0132556 0.01205277 0.01345849
|
|
0.01324964 0.01256585 0.01373625 0.01405025]
|
|
|
|
mean value: 0.013763856887817384
|
|
|
|
key: score_time
|
|
value: [0.01184344 0.00920844 0.00879431 0.0087707 0.00871181 0.00874615
|
|
0.0091579 0.00878787 0.00946546 0.00946307]
|
|
|
|
mean value: 0.009294915199279784
|
|
|
|
key: test_mcc
|
|
value: [0.67082039 1. 1. 0.79772404 0.77777778 1.
|
|
0.78881064 0.88888889 0.76388889 0.78334945]
|
|
|
|
mean value: 0.8471260073570214
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 1. 1. 0.88888889 0.88888889 1.
|
|
0.88235294 0.94117647 0.88235294 0.88235294]
|
|
|
|
mean value: 0.9199346405228758
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 1. 1. 0.875 0.88888889 1.
|
|
0.88888889 0.94117647 0.88888889 0.9 ]
|
|
|
|
mean value: 0.9224948400412797
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 1. 1. 1. 0.88888889 1.
|
|
0.8 0.88888889 0.88888889 0.81818182]
|
|
|
|
mean value: 0.9084848484848485
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.77777778 0.88888889 1.
|
|
1. 1. 0.88888889 1. ]
|
|
|
|
mean value: 0.9444444444444444
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 1. 1. 0.88888889 0.88888889 1.
|
|
0.88888889 0.94444444 0.88194444 0.875 ]
|
|
|
|
mean value: 0.9201388888888888
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 1. 1. 0.77777778 0.8 1.
|
|
0.8 0.88888889 0.8 0.81818182]
|
|
|
|
mean value: 0.8612121212121212
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10169578 0.10507154 0.0948379 0.09157467 0.09187484 0.09903312
|
|
0.09998369 0.0997653 0.10069346 0.0938766 ]
|
|
|
|
mean value: 0.09784069061279296
|
|
|
|
key: score_time
|
|
value: [0.01945329 0.0198369 0.01804137 0.01763129 0.01748037 0.01849914
|
|
0.01903343 0.0188868 0.01890397 0.01792884]
|
|
|
|
mean value: 0.018569540977478028
|
|
|
|
key: test_mcc
|
|
value: [0.62017367 0.77777778 0.77777778 0.34188173 0.53452248 0.67082039
|
|
0.78881064 0.16735967 0.88888889 0.29166667]
|
|
|
|
mean value: 0.5859679698606269
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.88888889 0.66666667 0.72222222 0.83333333
|
|
0.88235294 0.58823529 0.94117647 0.64705882]
|
|
|
|
mean value: 0.7836601307189542
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.88888889 0.88888889 0.7 0.61538462 0.84210526
|
|
0.88888889 0.46153846 0.94117647 0.66666667]
|
|
|
|
mean value: 0.7711719962184358
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.88888889 0.88888889 0.63636364 1. 0.8
|
|
0.8 0.6 1. 0.66666667]
|
|
|
|
mean value: 0.7973115773115773
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.88888889 0.88888889 0.77777778 0.44444444 0.88888889
|
|
1. 0.375 0.88888889 0.66666667]
|
|
|
|
mean value: 0.7819444444444444
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77777778 0.88888889 0.88888889 0.66666667 0.72222222 0.83333333
|
|
0.88888889 0.57638889 0.94444444 0.64583333]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.8 0.8 0.53846154 0.44444444 0.72727273
|
|
0.8 0.3 0.88888889 0.5 ]
|
|
|
|
mean value: 0.6491375291375291
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01018333 0.00994539 0.01004744 0.0099349 0.01006651 0.00992036
|
|
0.01011968 0.00942922 0.01031256 0.01003194]
|
|
|
|
mean value: 0.00999913215637207
|
|
|
|
key: score_time
|
|
value: [0.00951838 0.00945115 0.00948143 0.00938463 0.00939989 0.00944781
|
|
0.00944304 0.00938702 0.00942564 0.00945687]
|
|
|
|
mean value: 0.009439587593078613
|
|
|
|
key: test_mcc
|
|
value: [ 0.23570226 0.55555556 0.34188173 -0.12403473 0.23570226 0.2236068
|
|
-0.07042952 0.04351941 0.52777778 0.07042952]
|
|
|
|
mean value: 0.2039711060652974
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.61111111 0.77777778 0.66666667 0.44444444 0.61111111 0.61111111
|
|
0.47058824 0.52941176 0.76470588 0.52941176]
|
|
|
|
mean value: 0.6016339869281045
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.77777778 0.625 0.54545455 0.53333333 0.63157895
|
|
0.4 0.42857143 0.77777778 0.5 ]
|
|
|
|
mean value: 0.5886160476949951
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.58333333 0.77777778 0.71428571 0.46153846 0.66666667 0.6
|
|
0.42857143 0.5 0.77777778 0.57142857]
|
|
|
|
mean value: 0.6081379731379731
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.77777778 0.55555556 0.66666667 0.44444444 0.66666667
|
|
0.375 0.375 0.77777778 0.44444444]
|
|
|
|
mean value: 0.5861111111111111
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.61111111 0.77777778 0.66666667 0.44444444 0.61111111 0.61111111
|
|
0.46527778 0.52083333 0.76388889 0.53472222]
|
|
|
|
mean value: 0.6006944444444444
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.63636364 0.45454545 0.375 0.36363636 0.46153846
|
|
0.25 0.27272727 0.63636364 0.33333333]
|
|
|
|
mean value: 0.42835081585081586
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.24913812 1.27156734 1.20463991 1.19117212 1.18857145 1.18892694
|
|
1.18293142 1.16849566 1.18670654 1.27968574]
|
|
|
|
mean value: 1.211183524131775
|
|
|
|
key: score_time
|
|
value: [0.09751105 0.09638333 0.08782911 0.09188533 0.08863258 0.08907223
|
|
0.08769917 0.08837461 0.08824849 0.09611225]
|
|
|
|
mean value: 0.09117481708526612
|
|
|
|
key: test_mcc
|
|
value: [0.67082039 0.89442719 1. 0.70710678 0.70710678 1.
|
|
1. 0.29166667 1. 0.76388889]
|
|
|
|
mean value: 0.8035016702178504
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.94444444 1. 0.83333333 0.83333333 1.
|
|
1. 0.64705882 1. 0.88235294]
|
|
|
|
mean value: 0.8973856209150327
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.94736842 1. 0.8 0.8 1.
|
|
1. 0.625 1. 0.88888889]
|
|
|
|
mean value: 0.8903362573099416
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.9 1. 1. 1. 1.
|
|
1. 0.625 1. 0.88888889]
|
|
|
|
mean value: 0.9213888888888889
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.66666667 0.66666667 1.
|
|
1. 0.625 1. 0.88888889]
|
|
|
|
mean value: 0.8736111111111111
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.94444444 1. 0.83333333 0.83333333 1.
|
|
1. 0.64583333 1. 0.88194444]
|
|
|
|
mean value: 0.8972222222222223
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.9 1. 0.66666667 0.66666667 1.
|
|
1. 0.45454545 1. 0.8 ]
|
|
|
|
mean value: 0.8215151515151515
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.96250749 0.92383838 0.92812443 0.87012911 0.93210411 0.93901181
|
|
0.90484762 0.91266036 0.93102837 0.94302392]
|
|
|
|
mean value: 0.9247275590896606
|
|
|
|
key: score_time
|
|
value: [0.19898915 0.1324091 0.23657775 0.20810533 0.24690914 0.24799681
|
|
0.2273283 0.20355201 0.23310709 0.23432255]
|
|
|
|
mean value: 0.21692972183227538
|
|
|
|
key: test_mcc
|
|
value: [0.79772404 1. 1. 0.56980288 0.70710678 0.89442719
|
|
1. 0.29166667 1. 0.54935027]
|
|
|
|
mean value: 0.7810077821942321
|
|
|
|
key: train_mcc
|
|
value: [0.9621024 0.94967147 0.94967147 0.94967147 0.94967147 0.94967147
|
|
0.94997636 0.94968354 0.9499921 0.94968354]
|
|
|
|
mean value: 0.9509795300752732
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 1. 1. 0.77777778 0.83333333 0.94444444
|
|
1. 0.64705882 1. 0.76470588]
|
|
|
|
mean value: 0.8856209150326797
|
|
|
|
key: train_accuracy
|
|
value: [0.98101266 0.97468354 0.97468354 0.97468354 0.97468354 0.97468354
|
|
0.97484277 0.97484277 0.97484277 0.97484277]
|
|
|
|
mean value: 0.9753801448929226
|
|
|
|
key: test_fscore
|
|
value: [0.9 1. 1. 0.75 0.8 0.94117647
|
|
1. 0.625 1. 0.75 ]
|
|
|
|
mean value: 0.8766176470588235
|
|
|
|
key: train_fscore
|
|
value: [0.98113208 0.975 0.975 0.975 0.975 0.975
|
|
0.97530864 0.975 0.975 0.97468354]
|
|
|
|
mean value: 0.9756124261750804
|
|
|
|
key: test_precision
|
|
value: [0.81818182 1. 1. 0.85714286 1. 1.
|
|
1. 0.625 1. 0.85714286]
|
|
|
|
mean value: 0.9157467532467533
|
|
|
|
key: train_precision
|
|
value: [0.975 0.96296296 0.96296296 0.96296296 0.96296296 0.96296296
|
|
0.96341463 0.975 0.96296296 0.97468354]
|
|
|
|
mean value: 0.9665875956227916
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.66666667 0.66666667 0.88888889
|
|
1. 0.625 1. 0.66666667]
|
|
|
|
mean value: 0.8513888888888889
|
|
|
|
key: train_recall
|
|
value: [0.98734177 0.98734177 0.98734177 0.98734177 0.98734177 0.98734177
|
|
0.9875 0.975 0.98734177 0.97468354]
|
|
|
|
mean value: 0.9848575949367089
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 1. 1. 0.77777778 0.83333333 0.94444444
|
|
1. 0.64583333 1. 0.77083333]
|
|
|
|
mean value: 0.8861111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.98101266 0.97468354 0.97468354 0.97468354 0.97468354 0.97468354
|
|
0.97476266 0.97484177 0.97492089 0.97484177]
|
|
|
|
mean value: 0.975379746835443
|
|
|
|
key: test_jcc
|
|
value: [0.81818182 1. 1. 0.6 0.66666667 0.88888889
|
|
1. 0.45454545 1. 0.6 ]
|
|
|
|
mean value: 0.8028282828282828
|
|
|
|
key: train_jcc
|
|
value: [0.96296296 0.95121951 0.95121951 0.95121951 0.95121951 0.95121951
|
|
0.95180723 0.95121951 0.95121951 0.95061728]
|
|
|
|
mean value: 0.9523924061195096
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02123356 0.00871611 0.00887299 0.00869632 0.00875688 0.00867677
|
|
0.00881886 0.00880957 0.00868821 0.00883961]
|
|
|
|
mean value: 0.010010886192321777
|
|
|
|
key: score_time
|
|
value: [0.01431727 0.0084784 0.00853419 0.00855255 0.00838017 0.00845575
|
|
0.00846887 0.00848198 0.0084846 0.00844526]
|
|
|
|
mean value: 0.009059906005859375
|
|
|
|
key: test_mcc
|
|
value: [0.11111111 0.2236068 0.4472136 0.56980288 0.26726124 0.33333333
|
|
0.16903085 0.16903085 0.52777778 0.18055556]
|
|
|
|
mean value: 0.2998723997129735
|
|
|
|
key: train_mcc
|
|
value: [0.62045203 0.58609427 0.62345811 0.58344823 0.54500286 0.55700665
|
|
0.53581684 0.5346519 0.61038989 0.59809901]
|
|
|
|
mean value: 0.5794419778966067
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.61111111 0.72222222 0.77777778 0.61111111 0.66666667
|
|
0.58823529 0.58823529 0.76470588 0.58823529]
|
|
|
|
mean value: 0.6473856209150327
|
|
|
|
key: train_accuracy
|
|
value: [0.81012658 0.79113924 0.81012658 0.79113924 0.7721519 0.77848101
|
|
0.7672956 0.7672956 0.80503145 0.79874214]
|
|
|
|
mean value: 0.7891529336836239
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.63157895 0.73684211 0.75 0.46153846 0.66666667
|
|
0.53333333 0.53333333 0.77777778 0.58823529]
|
|
|
|
mean value: 0.6234861474954354
|
|
|
|
key: train_fscore
|
|
value: [0.80769231 0.77852349 0.8 0.79754601 0.77777778 0.77707006
|
|
0.76129032 0.7672956 0.8 0.79220779]
|
|
|
|
mean value: 0.7859403363639892
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.6 0.7 0.85714286 0.75 0.66666667
|
|
0.57142857 0.57142857 0.77777778 0.625 ]
|
|
|
|
mean value: 0.6675
|
|
|
|
key: train_precision
|
|
value: [0.81818182 0.82857143 0.84507042 0.77380952 0.75903614 0.78205128
|
|
0.78666667 0.7721519 0.81578947 0.81333333]
|
|
|
|
mean value: 0.7994661992145965
|
|
|
|
key: test_recall
|
|
value: [0.55555556 0.66666667 0.77777778 0.66666667 0.33333333 0.66666667
|
|
0.5 0.5 0.77777778 0.55555556]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_recall
|
|
value: [0.79746835 0.73417722 0.75949367 0.82278481 0.79746835 0.7721519
|
|
0.7375 0.7625 0.78481013 0.7721519 ]
|
|
|
|
mean value: 0.7740506329113924
|
|
|
|
key: test_roc_auc
|
|
value: [0.55555556 0.61111111 0.72222222 0.77777778 0.61111111 0.66666667
|
|
0.58333333 0.58333333 0.76388889 0.59027778]
|
|
|
|
mean value: 0.6465277777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.81012658 0.79113924 0.81012658 0.79113924 0.7721519 0.77848101
|
|
0.76748418 0.76732595 0.80490506 0.79857595]
|
|
|
|
mean value: 0.7891455696202532
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.46153846 0.58333333 0.6 0.3 0.5
|
|
0.36363636 0.36363636 0.63636364 0.41666667]
|
|
|
|
mean value: 0.460979020979021
|
|
|
|
key: train_jcc
|
|
value: [0.67741935 0.63736264 0.66666667 0.66326531 0.63636364 0.63541667
|
|
0.61458333 0.62244898 0.66666667 0.65591398]
|
|
|
|
mean value: 0.6476107226107226
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.05445504 0.05187917 0.05913138 0.05450749 0.05523133 0.05698442
|
|
0.05438495 0.2290132 0.04490089 0.04462194]
|
|
|
|
mean value: 0.07051098346710205
|
|
|
|
key: score_time
|
|
value: [0.01012802 0.01056314 0.01021743 0.01016593 0.01083326 0.01125741
|
|
0.01048422 0.01154351 0.01079202 0.01051164]
|
|
|
|
mean value: 0.010649657249450684
|
|
|
|
key: test_mcc
|
|
value: [0.77777778 1. 1. 0.79772404 0.77777778 1.
|
|
0.88888889 0.88888889 1. 0.88741197]
|
|
|
|
mean value: 0.9018469336015741
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 1.
|
|
0.94117647 0.94117647 1. 0.94117647]
|
|
|
|
mean value: 0.9490196078431372
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 1. 1. 0.875 0.88888889 1.
|
|
0.94117647 0.94117647 1. 0.94736842]
|
|
|
|
mean value: 0.948249914000688
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 0.88888889 1.
|
|
0.88888889 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.9455555555555555
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.77777778 0.88888889 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9555555555555555
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 1.
|
|
0.94444444 0.94444444 1. 0.9375 ]
|
|
|
|
mean value: 0.9493055555555555
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 1. 1. 0.77777778 0.8 1.
|
|
0.88888889 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.9055555555555556
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0296216 0.05306625 0.05170012 0.05277443 0.05263186 0.05192494
|
|
0.05169201 0.0526433 0.05250549 0.05197215]
|
|
|
|
mean value: 0.05005321502685547
|
|
|
|
key: score_time
|
|
value: [0.02377701 0.02253151 0.01830482 0.02136779 0.02420402 0.02055812
|
|
0.02063036 0.02410078 0.02347684 0.02271819]
|
|
|
|
mean value: 0.022166943550109862
|
|
|
|
key: test_mcc
|
|
value: [0.2236068 0.67082039 0.67082039 0.47140452 0.34188173 0.89442719
|
|
0.78881064 0.52777778 0.16903085 0.29166667]
|
|
|
|
mean value: 0.5050246958556477
|
|
|
|
key: train_mcc
|
|
value: [1. 0.9621024 0.97468354 0.98742088 0.98742088 0.97468354
|
|
0.96234177 1. 0.97484177 0.96234177]
|
|
|
|
mean value: 0.9785836569605925
|
|
|
|
key: test_accuracy
|
|
value: [0.61111111 0.83333333 0.83333333 0.72222222 0.66666667 0.94444444
|
|
0.88235294 0.76470588 0.58823529 0.64705882]
|
|
|
|
mean value: 0.7493464052287582
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98101266 0.98734177 0.99367089 0.99367089 0.98734177
|
|
0.98113208 1. 0.98742138 0.98113208]
|
|
|
|
mean value: 0.9892723509274739
|
|
|
|
key: test_fscore
|
|
value: [0.63157895 0.84210526 0.82352941 0.76190476 0.625 0.94117647
|
|
0.88888889 0.75 0.63157895 0.66666667]
|
|
|
|
mean value: 0.7562429357707996
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98089172 0.98734177 0.99363057 0.99371069 0.98734177
|
|
0.98113208 1. 0.98734177 0.98113208]
|
|
|
|
mean value: 0.9892522452216623
|
|
|
|
key: test_precision
|
|
value: [0.6 0.8 0.875 0.66666667 0.71428571 1.
|
|
0.8 0.75 0.6 0.66666667]
|
|
|
|
mean value: 0.7472619047619048
|
|
|
|
key: train_precision
|
|
value: [1. 0.98717949 0.98734177 1. 0.9875 0.98734177
|
|
0.98734177 1. 0.98734177 0.975 ]
|
|
|
|
mean value: 0.9899046575787083
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.88888889 0.77777778 0.88888889 0.55555556 0.88888889
|
|
1. 0.75 0.66666667 0.66666667]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [1. 0.97468354 0.98734177 0.98734177 1. 0.98734177
|
|
0.975 1. 0.98734177 0.98734177]
|
|
|
|
mean value: 0.9886392405063291
|
|
|
|
key: test_roc_auc
|
|
value: [0.61111111 0.83333333 0.83333333 0.72222222 0.66666667 0.94444444
|
|
0.88888889 0.76388889 0.58333333 0.64583333]
|
|
|
|
mean value: 0.7493055555555556
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98101266 0.98734177 0.99367089 0.99367089 0.98734177
|
|
0.98117089 1. 0.98742089 0.98117089]
|
|
|
|
mean value: 0.9892800632911393
|
|
|
|
key: test_jcc
|
|
value: [0.46153846 0.72727273 0.7 0.61538462 0.45454545 0.88888889
|
|
0.8 0.6 0.46153846 0.5 ]
|
|
|
|
mean value: 0.6209168609168609
|
|
|
|
key: train_jcc
|
|
value: [1. 0.9625 0.975 0.98734177 0.9875 0.975
|
|
0.96296296 1. 0.975 0.96296296]
|
|
|
|
mean value: 0.9788267698077825
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02158642 0.01039886 0.00884795 0.00946593 0.00987315 0.00987744
|
|
0.01004529 0.00951934 0.00998187 0.01012397]
|
|
|
|
mean value: 0.010972023010253906
|
|
|
|
key: score_time
|
|
value: [0.00903487 0.00969934 0.00854087 0.00920844 0.009238 0.00936532
|
|
0.00933886 0.00936294 0.00932789 0.00940394]
|
|
|
|
mean value: 0.009252047538757325
|
|
|
|
key: test_mcc
|
|
value: [0.47140452 0.67082039 0.79772404 0.4472136 0.3721042 1.
|
|
0.52777778 0.52777778 0.52777778 0.16903085]
|
|
|
|
mean value: 0.5511630932805054
|
|
|
|
key: train_mcc
|
|
value: [0.62045203 0.65955018 0.65870297 0.67632637 0.62104977 0.63336824
|
|
0.58739809 0.61006489 0.63574489 0.66044304]
|
|
|
|
mean value: 0.6363100454104343
|
|
|
|
key: test_accuracy
|
|
value: [0.72222222 0.83333333 0.88888889 0.72222222 0.66666667 1.
|
|
0.76470588 0.76470588 0.76470588 0.58823529]
|
|
|
|
mean value: 0.7715686274509803
|
|
|
|
key: train_accuracy
|
|
value: [0.81012658 0.82911392 0.82911392 0.83544304 0.81012658 0.8164557
|
|
0.79245283 0.80503145 0.81761006 0.83018868]
|
|
|
|
mean value: 0.8175662765703368
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.84210526 0.875 0.73684211 0.57142857 1.
|
|
0.75 0.75 0.77777778 0.63157895]
|
|
|
|
mean value: 0.7696637426900584
|
|
|
|
key: train_fscore
|
|
value: [0.80769231 0.83435583 0.83229814 0.8452381 0.81481481 0.81987578
|
|
0.78431373 0.80745342 0.81987578 0.83018868]
|
|
|
|
mean value: 0.8196106556291618
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.8 1. 0.7 0.8 1.
|
|
0.75 0.75 0.77777778 0.6 ]
|
|
|
|
mean value: 0.7844444444444445
|
|
|
|
key: train_precision
|
|
value: [0.81818182 0.80952381 0.81707317 0.79775281 0.79518072 0.80487805
|
|
0.82191781 0.80246914 0.80487805 0.825 ]
|
|
|
|
mean value: 0.8096855371900288
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 0.77777778 0.77777778 0.44444444 1.
|
|
0.75 0.75 0.77777778 0.66666667]
|
|
|
|
mean value: 0.7722222222222223
|
|
|
|
key: train_recall
|
|
value: [0.79746835 0.86075949 0.84810127 0.89873418 0.83544304 0.83544304
|
|
0.75 0.8125 0.83544304 0.83544304]
|
|
|
|
mean value: 0.8309335443037975
|
|
|
|
key: test_roc_auc
|
|
value: [0.72222222 0.83333333 0.88888889 0.72222222 0.66666667 1.
|
|
0.76388889 0.76388889 0.76388889 0.58333333]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.81012658 0.82911392 0.82911392 0.83544304 0.81012658 0.8164557
|
|
0.79272152 0.80498418 0.81772152 0.83022152]
|
|
|
|
mean value: 0.8176028481012658
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.72727273 0.77777778 0.58333333 0.4 1.
|
|
0.6 0.6 0.63636364 0.46153846]
|
|
|
|
mean value: 0.6401670551670552
|
|
|
|
key: train_jcc
|
|
value: [0.67741935 0.71578947 0.71276596 0.73195876 0.6875 0.69473684
|
|
0.64516129 0.67708333 0.69473684 0.70967742]
|
|
|
|
mean value: 0.6946829276077606
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01234007 0.01520658 0.01421547 0.01622844 0.01516342 0.01555896
|
|
0.01505685 0.01540637 0.01545596 0.01503229]
|
|
|
|
mean value: 0.014966440200805665
|
|
|
|
key: score_time
|
|
value: [0.00947547 0.01168132 0.01160622 0.01176691 0.01161003 0.01183724
|
|
0.01163387 0.01220989 0.01203275 0.01214504]
|
|
|
|
mean value: 0.011599874496459961
|
|
|
|
key: test_mcc
|
|
value: [0.67082039 0.77777778 0.70710678 0.4472136 0.62017367 0.79772404
|
|
0.54935027 0.69631062 0.49099025 0.41666667]
|
|
|
|
mean value: 0.6174134064971705
|
|
|
|
key: train_mcc
|
|
value: [0.93678391 0.93678391 0.75178998 0.9243469 0.94967147 0.91322332
|
|
0.91202532 0.81121795 0.5472547 0.87527844]
|
|
|
|
mean value: 0.8558375901294395
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.88888889 0.83333333 0.72222222 0.77777778 0.88888889
|
|
0.76470588 0.82352941 0.70588235 0.70588235]
|
|
|
|
mean value: 0.7944444444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.96835443 0.96835443 0.86708861 0.96202532 0.97468354 0.9556962
|
|
0.95597484 0.89937107 0.72955975 0.93710692]
|
|
|
|
mean value: 0.9218215110261921
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.88888889 0.85714286 0.73684211 0.71428571 0.875
|
|
0.77777778 0.84210526 0.7826087 0.70588235]
|
|
|
|
mean value: 0.8022638918267536
|
|
|
|
key: train_fscore
|
|
value: [0.96815287 0.96815287 0.88 0.9625 0.97435897 0.95424837
|
|
0.95597484 0.90804598 0.78606965 0.93506494]
|
|
|
|
mean value: 0.9292568479441141
|
|
|
|
key: test_precision
|
|
value: [0.8 0.88888889 0.75 0.7 1. 1.
|
|
0.7 0.72727273 0.64285714 0.75 ]
|
|
|
|
mean value: 0.7959018759018759
|
|
|
|
key: train_precision
|
|
value: [0.97435897 0.97435897 0.80208333 0.95061728 0.98701299 0.98648649
|
|
0.96202532 0.84042553 0.64754098 0.96 ]
|
|
|
|
mean value: 0.908490987147852
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 1. 0.77777778 0.55555556 0.77777778
|
|
0.875 1. 1. 0.66666667]
|
|
|
|
mean value: 0.8430555555555556
|
|
|
|
key: train_recall
|
|
value: [0.96202532 0.96202532 0.97468354 0.97468354 0.96202532 0.92405063
|
|
0.95 0.9875 1. 0.91139241]
|
|
|
|
mean value: 0.9608386075949367
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.88888889 0.83333333 0.72222222 0.77777778 0.88888889
|
|
0.77083333 0.83333333 0.6875 0.70833333]
|
|
|
|
mean value: 0.7944444444444445
|
|
|
|
key: train_roc_auc
|
|
value: [0.96835443 0.96835443 0.86708861 0.96202532 0.97468354 0.9556962
|
|
0.95601266 0.89881329 0.73125 0.9369462 ]
|
|
|
|
mean value: 0.9219224683544304
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.8 0.75 0.58333333 0.55555556 0.77777778
|
|
0.63636364 0.72727273 0.64285714 0.54545455]
|
|
|
|
mean value: 0.6745887445887446
|
|
|
|
key: train_jcc
|
|
value: [0.9382716 0.9382716 0.78571429 0.92771084 0.95 0.9125
|
|
0.91566265 0.83157895 0.64754098 0.87804878]
|
|
|
|
mean value: 0.8725299701029515
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01438379 0.01469326 0.01489258 0.01415277 0.01420665 0.01426673
|
|
0.01395035 0.01545525 0.01461434 0.01494837]
|
|
|
|
mean value: 0.014556407928466797
|
|
|
|
key: score_time
|
|
value: [0.00999117 0.01173043 0.01184225 0.01177931 0.01166821 0.01177931
|
|
0.01173019 0.01165199 0.01175547 0.01169562]
|
|
|
|
mean value: 0.011562395095825195
|
|
|
|
key: test_mcc
|
|
value: [0.1490712 0.67082039 0.70710678 0.4472136 0.3721042 0.56980288
|
|
0.65277778 0.54935027 0.78334945 0.40849122]
|
|
|
|
mean value: 0.531008777277298
|
|
|
|
key: train_mcc
|
|
value: [0.56273143 0.9621024 0.89188259 0.93678391 0.85805812 0.75637877
|
|
0.81121795 0.8885037 0.9256747 0.90401404]
|
|
|
|
mean value: 0.8497347611583999
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.83333333 0.83333333 0.72222222 0.66666667 0.77777778
|
|
0.82352941 0.76470588 0.88235294 0.70588235]
|
|
|
|
mean value: 0.7565359477124183
|
|
|
|
key: train_accuracy
|
|
value: [0.74050633 0.98101266 0.94303797 0.96835443 0.92405063 0.86708861
|
|
0.89937107 0.94339623 0.96226415 0.94968553]
|
|
|
|
mean value: 0.9178767614043468
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.84210526 0.85714286 0.73684211 0.57142857 0.75
|
|
0.82352941 0.77777778 0.9 0.73684211]
|
|
|
|
mean value: 0.7329001425131456
|
|
|
|
key: train_fscore
|
|
value: [0.64957265 0.98113208 0.94610778 0.96815287 0.92941176 0.84892086
|
|
0.90804598 0.94545455 0.96103896 0.95180723]
|
|
|
|
mean value: 0.9089644716153422
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.8 0.75 0.7 0.8 0.85714286
|
|
0.77777778 0.7 0.81818182 0.7 ]
|
|
|
|
mean value: 0.756976911976912
|
|
|
|
key: train_precision
|
|
value: [1. 0.975 0.89772727 0.97435897 0.86813187 0.98333333
|
|
0.84042553 0.91764706 0.98666667 0.90804598]
|
|
|
|
mean value: 0.9351336682968032
|
|
|
|
key: test_recall
|
|
value: [0.22222222 0.88888889 1. 0.77777778 0.44444444 0.66666667
|
|
0.875 0.875 1. 0.77777778]
|
|
|
|
mean value: 0.7527777777777778
|
|
|
|
key: train_recall
|
|
value: [0.48101266 0.98734177 1. 0.96202532 1. 0.74683544
|
|
0.9875 0.975 0.93670886 1. ]
|
|
|
|
mean value: 0.9076424050632912
|
|
|
|
key: test_roc_auc
|
|
value: [0.55555556 0.83333333 0.83333333 0.72222222 0.66666667 0.77777778
|
|
0.82638889 0.77083333 0.875 0.70138889]
|
|
|
|
mean value: 0.75625
|
|
|
|
key: train_roc_auc
|
|
value: [0.74050633 0.98101266 0.94303797 0.96835443 0.92405063 0.86708861
|
|
0.89881329 0.9431962 0.96210443 0.95 ]
|
|
|
|
mean value: 0.9178164556962025
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.72727273 0.75 0.58333333 0.4 0.6
|
|
0.7 0.63636364 0.81818182 0.58333333]
|
|
|
|
mean value: 0.5998484848484849
|
|
|
|
key: train_jcc
|
|
value: [0.48101266 0.96296296 0.89772727 0.9382716 0.86813187 0.7375
|
|
0.83157895 0.89655172 0.925 0.90804598]
|
|
|
|
mean value: 0.844678301550607
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11545205 0.10431743 0.10553479 0.10573816 0.10540199 0.10588789
|
|
0.10381937 0.10629654 0.10485244 0.10286975]
|
|
|
|
mean value: 0.10601704120635987
|
|
|
|
key: score_time
|
|
value: [0.01625061 0.01548195 0.01487517 0.0151825 0.01617122 0.01584601
|
|
0.01527643 0.0148437 0.01535702 0.01589417]
|
|
|
|
mean value: 0.015517878532409667
|
|
|
|
key: test_mcc
|
|
value: [0.77777778 1. 1. 0.79772404 0.77777778 0.89442719
|
|
1. 0.88888889 1. 0.88741197]
|
|
|
|
mean value: 0.9024007638126769
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 0.94444444
|
|
1. 0.94117647 1. 0.94117647]
|
|
|
|
mean value: 0.9493464052287581
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 1. 1. 0.875 0.88888889 0.94117647
|
|
1. 0.94117647 1. 0.94736842]
|
|
|
|
mean value: 0.948249914000688
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 0.88888889 1.
|
|
1. 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.9566666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.77777778 0.88888889 0.88888889
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9444444444444444
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 0.94444444
|
|
1. 0.94444444 1. 0.9375 ]
|
|
|
|
mean value: 0.9493055555555555
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 1. 1. 0.77777778 0.8 0.88888889
|
|
1. 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.9055555555555556
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03948283 0.04953337 0.03142118 0.03478074 0.03619123 0.03574586
|
|
0.03580761 0.04659438 0.03955555 0.03685379]
|
|
|
|
mean value: 0.03859665393829346
|
|
|
|
key: score_time
|
|
value: [0.02498698 0.02185464 0.01938224 0.02102304 0.0222981 0.0198257
|
|
0.02262115 0.02782845 0.02416492 0.02282 ]
|
|
|
|
mean value: 0.02268052101135254
|
|
|
|
key: test_mcc
|
|
value: [0.77777778 1. 1. 0.79772404 0.77777778 1.
|
|
0.78881064 0.88888889 1. 0.88741197]
|
|
|
|
mean value: 0.8918391084873468
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.97499604 1. 0.98742088 0.98742088
|
|
0.96234177 0.9875 0.98749803 0.98749803]
|
|
|
|
mean value: 0.9874675649417293
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 1.
|
|
0.88235294 0.94117647 1. 0.94117647]
|
|
|
|
mean value: 0.9431372549019608
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98734177 1. 0.99367089 0.99367089
|
|
0.98113208 0.99371069 0.99371069 0.99371069]
|
|
|
|
mean value: 0.9936947695247194
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 1. 1. 0.875 0.88888889 1.
|
|
0.88888889 0.94117647 1. 0.94736842]
|
|
|
|
mean value: 0.9430211558307534
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.98717949 1. 0.99363057 0.99363057
|
|
0.98113208 0.99371069 0.99363057 0.99363057]
|
|
|
|
mean value: 0.9936544547468715
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 0.88888889 1.
|
|
0.8 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.9366666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98734177 1. 1. 1. ]
|
|
|
|
mean value: 0.9987341772151899
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.77777778 0.88888889 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9555555555555555
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.97468354 1. 0.98734177 0.98734177
|
|
0.975 0.9875 0.98734177 0.98734177]
|
|
|
|
mean value: 0.9886550632911393
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 1.
|
|
0.88888889 0.94444444 1. 0.9375 ]
|
|
|
|
mean value: 0.94375
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.98734177 1. 0.99367089 0.99367089
|
|
0.98117089 0.99375 0.99367089 0.99367089]
|
|
|
|
mean value: 0.9936946202531646
|
|
|
|
key: test_jcc
|
|
value: [0.8 1. 1. 0.77777778 0.8 1.
|
|
0.8 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.8966666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.97468354 1. 0.98734177 0.98734177
|
|
0.96296296 0.9875 0.98734177 0.98734177]
|
|
|
|
mean value: 0.9874513595874356
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03753996 0.0400219 0.04699039 0.04979587 0.02395082 0.02255321
|
|
0.02178669 0.04306293 0.05040836 0.05459094]
|
|
|
|
mean value: 0.03907010555267334
|
|
|
|
key: score_time
|
|
value: [0.02251029 0.01236272 0.02456164 0.02370811 0.01268172 0.01240444
|
|
0.01238751 0.02090907 0.02468371 0.01258755]
|
|
|
|
mean value: 0.017879676818847657
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.11396058 0.34188173 0.11111111 0.1490712 0.11396058
|
|
0.09128709 0.04351941 0.60858062 0.18055556]
|
|
|
|
mean value: 0.20872612071548124
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.55555556 0.66666667 0.55555556 0.55555556 0.55555556
|
|
0.52941176 0.52941176 0.76470588 0.58823529]
|
|
|
|
mean value: 0.5967320261437908
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.5 0.7 0.55555556 0.33333333 0.5
|
|
0.6 0.42857143 0.71428571 0.58823529]
|
|
|
|
mean value: 0.5586647992530346
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.57142857 0.63636364 0.55555556 0.66666667 0.57142857
|
|
0.5 0.5 1. 0.625 ]
|
|
|
|
mean value: 0.6293109668109668
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.44444444 0.77777778 0.55555556 0.22222222 0.44444444
|
|
0.75 0.375 0.55555556 0.55555556]
|
|
|
|
mean value: 0.5347222222222222
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.55555556 0.66666667 0.55555556 0.55555556 0.55555556
|
|
0.54166667 0.52083333 0.77777778 0.59027778]
|
|
|
|
mean value: 0.5986111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.33333333 0.53846154 0.38461538 0.2 0.33333333
|
|
0.42857143 0.27272727 0.55555556 0.41666667]
|
|
|
|
mean value: 0.39632645132645133
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.3016758 0.29095507 0.28988028 0.28957891 0.28800511 0.29090023
|
|
0.29927564 0.30395865 0.2906909 0.29244685]
|
|
|
|
mean value: 0.29373674392700194
|
|
|
|
key: score_time
|
|
value: [0.00947571 0.00942588 0.00925851 0.01021242 0.00925803 0.00914812
|
|
0.00925756 0.00914478 0.00958729 0.00923419]
|
|
|
|
mean value: 0.009400248527526855
|
|
|
|
key: test_mcc
|
|
value: [0.77777778 1. 1. 0.79772404 0.77777778 1.
|
|
0.88888889 0.88888889 1. 0.88741197]
|
|
|
|
mean value: 0.9018469336015741
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 1.
|
|
0.94117647 0.94117647 1. 0.94117647]
|
|
|
|
mean value: 0.9490196078431372
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 1. 1. 0.875 0.88888889 1.
|
|
0.94117647 0.94117647 1. 0.94736842]
|
|
|
|
mean value: 0.948249914000688
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 0.88888889 1.
|
|
0.88888889 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.9455555555555555
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 1. 1. 0.77777778 0.88888889 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9555555555555555
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 1. 1. 0.88888889 0.88888889 1.
|
|
0.94444444 0.94444444 1. 0.9375 ]
|
|
|
|
mean value: 0.9493055555555555
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 1. 1. 0.77777778 0.8 1.
|
|
0.88888889 0.88888889 1. 0.9 ]
|
|
|
|
mean value: 0.9055555555555556
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01823831 0.01897502 0.01943731 0.0219481 0.01919198 0.01920938
|
|
0.01920414 0.02677155 0.01945543 0.02004194]
|
|
|
|
mean value: 0.02024731636047363
|
|
|
|
key: score_time
|
|
value: [0.01213932 0.01207805 0.01471949 0.02359366 0.01789927 0.01716661
|
|
0.01457524 0.01231289 0.01462674 0.01511145]
|
|
|
|
mean value: 0.01542227268218994
|
|
|
|
key: test_mcc
|
|
value: [-0.11396058 0.62017367 0.55555556 0. -0.4472136 -0.11111111
|
|
0.07042952 -0.05555556 0.24514517 -0.29166667]
|
|
|
|
mean value: 0.04717964133587751
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.44444444 0.77777778 0.77777778 0.5 0.27777778 0.44444444
|
|
0.52941176 0.47058824 0.58823529 0.35294118]
|
|
|
|
mean value: 0.5163398692810458
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.81818182 0.77777778 0.57142857 0.31578947 0.44444444
|
|
0.55555556 0.47058824 0.46153846 0.35294118]
|
|
|
|
mean value: 0.5268245514375546
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.45454545 0.69230769 0.77777778 0.5 0.3 0.44444444
|
|
0.5 0.44444444 0.75 0.375 ]
|
|
|
|
mean value: 0.5238519813519813
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.55555556 1. 0.77777778 0.66666667 0.33333333 0.44444444
|
|
0.625 0.5 0.33333333 0.33333333]
|
|
|
|
mean value: 0.5569444444444445
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.44444444 0.77777778 0.77777778 0.5 0.27777778 0.44444444
|
|
0.53472222 0.47222222 0.60416667 0.35416667]
|
|
|
|
mean value: 0.5187499999999999
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.69230769 0.63636364 0.4 0.1875 0.28571429
|
|
0.38461538 0.30769231 0.3 0.21428571]
|
|
|
|
mean value: 0.37418123543123544
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03576422 0.02692246 0.01386642 0.01340842 0.01377153 0.04542184
|
|
0.03384018 0.03396988 0.03371286 0.0340662 ]
|
|
|
|
mean value: 0.02847440242767334
|
|
|
|
key: score_time
|
|
value: [0.02211285 0.01175857 0.01203871 0.01246452 0.01206517 0.01190996
|
|
0.02071691 0.02354932 0.0222075 0.02081013]
|
|
|
|
mean value: 0.01696336269378662
|
|
|
|
key: test_mcc
|
|
value: [0.67082039 0.77777778 0.67082039 0.55555556 0.62017367 0.77777778
|
|
0.65277778 0.54935027 1. 0.65277778]
|
|
|
|
mean value: 0.6927831391686119
|
|
|
|
key: train_mcc
|
|
value: [0.94967147 0.94967147 0.93678391 0.94967147 0.9621024 0.9621024
|
|
0.92482989 0.96234177 0.94997636 0.96233582]
|
|
|
|
mean value: 0.9509486971738486
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.88888889 0.83333333 0.77777778 0.77777778 0.88888889
|
|
0.82352941 0.76470588 1. 0.82352941]
|
|
|
|
mean value: 0.8411764705882353
|
|
|
|
key: train_accuracy
|
|
value: [0.97468354 0.97468354 0.96835443 0.97468354 0.98101266 0.98101266
|
|
0.96226415 0.98113208 0.97484277 0.98113208]
|
|
|
|
mean value: 0.9753801448929226
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.88888889 0.84210526 0.77777778 0.71428571 0.88888889
|
|
0.82352941 0.77777778 1. 0.82352941]
|
|
|
|
mean value: 0.8378888397464249
|
|
|
|
key: train_fscore
|
|
value: [0.97435897 0.97435897 0.96815287 0.97435897 0.98113208 0.98089172
|
|
0.96202532 0.98113208 0.97435897 0.98089172]
|
|
|
|
mean value: 0.9751661670567474
|
|
|
|
key: test_precision
|
|
value: [0.8 0.88888889 0.8 0.77777778 1. 0.88888889
|
|
0.77777778 0.7 1. 0.875 ]
|
|
|
|
mean value: 0.8508333333333333
|
|
|
|
key: train_precision
|
|
value: [0.98701299 0.98701299 0.97435897 0.98701299 0.975 0.98717949
|
|
0.97435897 0.98734177 0.98701299 0.98717949]
|
|
|
|
mean value: 0.983347064328077
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.88888889 0.88888889 0.77777778 0.55555556 0.88888889
|
|
0.875 0.875 1. 0.77777778]
|
|
|
|
mean value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:168: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:171: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
0.8416666666666667
|
|
|
|
key: train_recall
|
|
value: [0.96202532 0.96202532 0.96202532 0.96202532 0.98734177 0.97468354
|
|
0.95 0.975 0.96202532 0.97468354]
|
|
|
|
mean value: 0.9671835443037975
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.88888889 0.83333333 0.77777778 0.77777778 0.88888889
|
|
0.82638889 0.77083333 1. 0.82638889]
|
|
|
|
mean value: 0.8423611111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.97468354 0.97468354 0.96835443 0.97468354 0.98101266 0.98101266
|
|
0.96234177 0.98117089 0.97476266 0.98109177]
|
|
|
|
mean value: 0.975379746835443
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.8 0.72727273 0.63636364 0.55555556 0.8
|
|
0.7 0.63636364 1. 0.7 ]
|
|
|
|
mean value: 0.7282828282828283
|
|
|
|
key: train_jcc
|
|
value: [0.95 0.95 0.9382716 0.95 0.96296296 0.9625
|
|
0.92682927 0.96296296 0.95 0.9625 ]
|
|
|
|
mean value: 0.951602679915688
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25937772 0.18409133 0.15225029 0.16805243 0.20423412 0.23713088
|
|
0.21333742 0.21399665 0.22232294 0.31374693]
|
|
|
|
mean value: 0.21685407161712647
|
|
|
|
key: score_time
|
|
value: [0.01209164 0.0203526 0.01194811 0.01314783 0.0207057 0.01997375
|
|
0.02330065 0.01731992 0.02275801 0.02117276]
|
|
|
|
mean value: 0.01827709674835205
|
|
|
|
key: test_mcc
|
|
value: [0.55555556 0.77777778 0.67082039 0.67082039 0.62017367 0.77777778
|
|
0.65277778 0.54935027 1. 0.65277778]
|
|
|
|
mean value: 0.6927831391686119
|
|
|
|
key: train_mcc
|
|
value: [0.98742088 0.94967147 0.93678391 0.9621024 0.9621024 0.9621024
|
|
0.92482989 0.96234177 0.94997636 0.96233582]
|
|
|
|
mean value: 0.9559667312380578
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.83333333 0.83333333 0.77777778 0.88888889
|
|
0.82352941 0.76470588 1. 0.82352941]
|
|
|
|
mean value: 0.8411764705882353
|
|
|
|
key: train_accuracy
|
|
value: [0.99367089 0.97468354 0.96835443 0.98101266 0.98101266 0.98101266
|
|
0.96226415 0.98113208 0.97484277 0.98113208]
|
|
|
|
mean value: 0.9779117904625428
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.88888889 0.84210526 0.84210526 0.71428571 0.88888889
|
|
0.82352941 0.77777778 1. 0.82352941]
|
|
|
|
mean value: 0.8378888397464249
|
|
|
|
key: train_fscore
|
|
value: [0.99363057 0.97435897 0.96815287 0.98089172 0.98113208 0.98089172
|
|
0.96202532 0.98113208 0.97435897 0.98089172]
|
|
|
|
mean value: 0.9777466014843156
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.88888889 0.8 0.8 1. 0.88888889
|
|
0.77777778 0.7 1. 0.875 ]
|
|
|
|
mean value: 0.8508333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 0.98701299 0.97435897 0.98717949 0.975 0.98717949
|
|
0.97435897 0.98734177 0.98701299 0.98717949]
|
|
|
|
mean value: 0.9846624156434283
|
|
|
|
key: test_recall
|
|
value: [0.77777778 0.88888889 0.88888889 0.88888889 0.55555556 0.88888889
|
|
0.875 0.875 1. 0.77777778]
|
|
|
|
mean value: 0.8416666666666667
|
|
|
|
key: train_recall
|
|
value: [0.98734177 0.96202532 0.96202532 0.97468354 0.98734177 0.97468354
|
|
0.95 0.975 0.96202532 0.97468354]
|
|
|
|
mean value: 0.9709810126582279
|
|
|
|
key: test_roc_auc
|
|
value: [0.77777778 0.88888889 0.83333333 0.83333333 0.77777778 0.88888889
|
|
0.82638889 0.77083333 1. 0.82638889]
|
|
|
|
mean value: 0.8423611111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.99367089 0.97468354 0.96835443 0.98101266 0.98101266 0.98101266
|
|
0.96234177 0.98117089 0.97476266 0.98109177]
|
|
|
|
mean value: 0.9779113924050633
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.8 0.72727273 0.72727273 0.55555556 0.8
|
|
0.7 0.63636364 1. 0.7 ]
|
|
|
|
mean value: 0.7282828282828283
|
|
|
|
key: train_jcc
|
|
value: [0.98734177 0.95 0.9382716 0.9625 0.96296296 0.9625
|
|
0.92682927 0.96296296 0.95 0.9625 ]
|
|
|
|
mean value: 0.9565868571308779
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03828049 0.03973126 0.0394094 0.03932691 0.03923225 0.03911781
|
|
0.03785086 0.03869772 0.03864527 0.03835726]
|
|
|
|
mean value: 0.03886492252349853
|
|
|
|
key: score_time
|
|
value: [0.012254 0.01499844 0.01548243 0.01451349 0.01436687 0.0145216
|
|
0.01563859 0.01568961 0.01582623 0.02349043]
|
|
|
|
mean value: 0.01567816734313965
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.78824078 0.94028478 0.96966868 0.87867338 0.88382395
|
|
0.94028478 0.81671746 0.75378788 0.87689394]
|
|
|
|
mean value: 0.8757884239501552
|
|
|
|
key: train_mcc
|
|
value: [0.9389166 0.93858842 0.93219993 0.93239416 0.9456811 0.92888267
|
|
0.9114359 0.91482668 0.92506472 0.94252441]
|
|
|
|
mean value: 0.9310514594809193
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.89393939 0.96923077 0.98461538 0.93846154 0.93846154
|
|
0.96923077 0.90769231 0.87692308 0.93846154]
|
|
|
|
mean value: 0.9371561771561772
|
|
|
|
key: train_accuracy
|
|
value: [0.96928328 0.96928328 0.96592845 0.96592845 0.97274276 0.96422487
|
|
0.95570698 0.95741056 0.96252129 0.97103918]
|
|
|
|
mean value: 0.9654069108267292
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.89552239 0.96969697 0.98412698 0.93939394 0.94117647
|
|
0.96875 0.91176471 0.87878788 0.93939394]
|
|
|
|
mean value: 0.9383837156527016
|
|
|
|
key: train_fscore
|
|
value: [0.96969697 0.96938776 0.96644295 0.96655518 0.97306397 0.96482412
|
|
0.95578231 0.95741056 0.96258503 0.97142857]
|
|
|
|
mean value: 0.9657177435980547
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.88235294 0.94117647 1. 0.91176471 0.88888889
|
|
1. 0.88571429 0.87878788 0.93939394]
|
|
|
|
mean value: 0.9269255581020287
|
|
|
|
key: train_precision
|
|
value: [0.95681063 0.96610169 0.95364238 0.95065789 0.96333333 0.95049505
|
|
0.95254237 0.95578231 0.95932203 0.95695364]
|
|
|
|
mean value: 0.9565641349914513
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.90909091 1. 0.96875 0.96875 1.
|
|
0.93939394 0.93939394 0.87878788 0.93939394]
|
|
|
|
mean value: 0.9513257575757575
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.97269625 0.97959184 0.9829932 0.9829932 0.97959184
|
|
0.95904437 0.95904437 0.96587031 0.98634812]
|
|
|
|
mean value: 0.9751108634580112
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.89393939 0.96969697 0.984375 0.93892045 0.93939394
|
|
0.96969697 0.90719697 0.87689394 0.93844697]
|
|
|
|
mean value: 0.9373106060606061
|
|
|
|
key: train_roc_auc
|
|
value: [0.96928328 0.96928328 0.96590513 0.96589933 0.97272527 0.96419865
|
|
0.95571266 0.95741334 0.96252699 0.97106522]
|
|
|
|
mean value: 0.9654013141092614
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.81081081 0.94117647 0.96875 0.88571429 0.88888889
|
|
0.93939394 0.83783784 0.78378378 0.88571429]
|
|
|
|
mean value: 0.8856356017017781
|
|
|
|
key: train_jcc
|
|
value: [0.94117647 0.94059406 0.93506494 0.93527508 0.94754098 0.93203883
|
|
0.91530945 0.91830065 0.92786885 0.94444444]
|
|
|
|
mean value: 0.9337613761275577
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.87459993 1.2076571 0.90833497 1.01458383 1.08987594 1.33507919
|
|
1.01251268 0.96896434 0.90253735 1.01945567]
|
|
|
|
mean value: 1.0333600997924806
|
|
|
|
key: score_time
|
|
value: [0.01497388 0.01613474 0.0149045 0.0150032 0.01484799 0.01342773
|
|
0.01664591 0.0125823 0.01590395 0.01560664]
|
|
|
|
mean value: 0.015003085136413574
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.88040627 0.88382395 0.90814394 0.84995597 0.96969697
|
|
1. 0.91144345 0.94017476 0.90814394]
|
|
|
|
mean value: 0.9161297844265968
|
|
|
|
key: train_mcc
|
|
value: [0.96928892 0.95906671 0.99320865 0.97615537 0.97276495 0.95571215
|
|
0.96934132 0.95232236 1. 0.97957999]
|
|
|
|
mean value: 0.9727440421839888
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.93939394 0.93846154 0.95384615 0.92307692 0.98461538
|
|
1. 0.95384615 0.96923077 0.95384615]
|
|
|
|
mean value: 0.9570862470862471
|
|
|
|
key: train_accuracy
|
|
value: [0.98464164 0.97952218 0.99659284 0.98807496 0.98637138 0.97785349
|
|
0.9846678 0.97614991 1. 0.98977853]
|
|
|
|
mean value: 0.9863652749271764
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.94117647 0.94117647 0.95384615 0.92537313 0.98461538
|
|
1. 0.95652174 0.97058824 0.95384615]
|
|
|
|
mean value: 0.9582367622834089
|
|
|
|
key: train_fscore
|
|
value: [0.98461538 0.97959184 0.99661017 0.98811545 0.98644068 0.97792869
|
|
0.9846678 0.97619048 1. 0.98979592]
|
|
|
|
mean value: 0.9863956408365138
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.91428571 0.88888889 0.93939394 0.88571429 0.96969697
|
|
1. 0.91666667 0.94285714 0.96875 ]
|
|
|
|
mean value: 0.9367430078091843
|
|
|
|
key: train_precision
|
|
value: [0.98630137 0.97627119 0.99324324 0.98644068 0.98310811 0.97627119
|
|
0.9829932 0.97288136 1. 0.98644068]
|
|
|
|
mean value: 0.984395100323904
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.96969697 1. 0.96875 0.96875 1.
|
|
1. 1. 1. 0.93939394]
|
|
|
|
mean value: 0.9816287878787879
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.98293515 1. 0.98979592 0.98979592 0.97959184
|
|
0.98634812 0.97952218 1. 0.99317406]
|
|
|
|
mean value: 0.9884098349237306
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.93939394 0.93939394 0.95407197 0.92376894 0.98484848
|
|
1. 0.953125 0.96875 0.95407197]
|
|
|
|
mean value: 0.9571969696969698
|
|
|
|
key: train_roc_auc
|
|
value: [0.98464164 0.97952218 0.99658703 0.98807202 0.98636554 0.97785053
|
|
0.98467066 0.97615565 1. 0.98978431]
|
|
|
|
mean value: 0.9863649555385294
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.88888889 0.88888889 0.91176471 0.86111111 0.96969697
|
|
1. 0.91666667 0.94285714 0.91176471]
|
|
|
|
mean value: 0.9205924794160089
|
|
|
|
key: train_jcc
|
|
value: [0.96969697 0.96 0.99324324 0.97651007 0.97324415 0.95681063
|
|
0.96979866 0.95348837 1. 0.97979798]
|
|
|
|
mean value: 0.9732590068049858
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0157187 0.01268959 0.01264095 0.01270771 0.01175928 0.01104641
|
|
0.01074839 0.01086068 0.01050949 0.01081109]
|
|
|
|
mean value: 0.01194922924041748
|
|
|
|
key: score_time
|
|
value: [0.01302481 0.01075292 0.01039052 0.00946784 0.00998569 0.00904059
|
|
0.00938535 0.009027 0.00891423 0.00903201]
|
|
|
|
mean value: 0.009902095794677735
|
|
|
|
key: test_mcc
|
|
value: [0.78824078 0.54772256 0.48376972 0.84644588 0.72322307 0.66161167
|
|
0.69383917 0.60037879 0.53838887 0.66193182]
|
|
|
|
mean value: 0.6545552328074842
|
|
|
|
key: train_mcc
|
|
value: [0.64506622 0.71337706 0.672924 0.69348941 0.6624506 0.68322362
|
|
0.69015689 0.70358246 0.66615196 0.6906491 ]
|
|
|
|
mean value: 0.6821071327367672
|
|
|
|
key: test_accuracy
|
|
value: [0.89393939 0.77272727 0.73846154 0.92307692 0.86153846 0.83076923
|
|
0.84615385 0.8 0.76923077 0.83076923]
|
|
|
|
mean value: 0.8266666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.8225256 0.85665529 0.83645656 0.84667802 0.82793867 0.84156729
|
|
0.84497445 0.85178876 0.8330494 0.84497445]
|
|
|
|
mean value: 0.840660848532772
|
|
|
|
key: test_fscore
|
|
value: [0.89230769 0.7826087 0.75362319 0.92063492 0.85714286 0.82539683
|
|
0.84375 0.8 0.7761194 0.83076923]
|
|
|
|
mean value: 0.8282352813294571
|
|
|
|
key: train_fscore
|
|
value: [0.82312925 0.85762712 0.83728814 0.84848485 0.81535649 0.84317032
|
|
0.846543 0.85178876 0.83161512 0.84808013]
|
|
|
|
mean value: 0.8403083176678291
|
|
|
|
key: test_precision
|
|
value: [0.90625 0.75 0.7027027 0.93548387 0.87096774 0.83870968
|
|
0.87096774 0.8125 0.76470588 0.84375 ]
|
|
|
|
mean value: 0.8296037617313708
|
|
|
|
key: train_precision
|
|
value: [0.82033898 0.85185185 0.83445946 0.84 0.88142292 0.8361204
|
|
0.83666667 0.85034014 0.83737024 0.83006536]
|
|
|
|
mean value: 0.8418636025013883
|
|
|
|
key: test_recall
|
|
value: [0.87878788 0.81818182 0.8125 0.90625 0.84375 0.8125
|
|
0.81818182 0.78787879 0.78787879 0.81818182]
|
|
|
|
mean value: 0.8284090909090909
|
|
|
|
key: train_recall
|
|
value: [0.82593857 0.86348123 0.84013605 0.85714286 0.7585034 0.85034014
|
|
0.85665529 0.85324232 0.82593857 0.8668942 ]
|
|
|
|
mean value: 0.8398272619628055
|
|
|
|
key: test_roc_auc
|
|
value: [0.89393939 0.77272727 0.73958333 0.92282197 0.86126894 0.83049242
|
|
0.84659091 0.80018939 0.76893939 0.83096591]
|
|
|
|
mean value: 0.826751893939394
|
|
|
|
key: train_roc_auc
|
|
value: [0.8225256 0.85665529 0.83645028 0.84666017 0.82805716 0.84155232
|
|
0.84499431 0.85179123 0.83303731 0.84501172]
|
|
|
|
mean value: 0.8406735390401894
|
|
|
|
key: test_jcc
|
|
value: [0.80555556 0.64285714 0.60465116 0.85294118 0.75 0.7027027
|
|
0.72972973 0.66666667 0.63414634 0.71052632]
|
|
|
|
mean value: 0.7099776794025972
|
|
|
|
key: train_jcc
|
|
value: [0.69942197 0.75074184 0.72011662 0.73684211 0.6882716 0.72886297
|
|
0.73391813 0.74183976 0.71176471 0.73623188]
|
|
|
|
mean value: 0.7248011588325265
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0107584 0.01122403 0.01064491 0.01139951 0.01227427 0.01099348
|
|
0.01089954 0.01204586 0.01091719 0.01089287]
|
|
|
|
mean value: 0.011205005645751952
|
|
|
|
key: score_time
|
|
value: [0.00904584 0.00896311 0.00916839 0.00917315 0.00924873 0.00929451
|
|
0.00943422 0.00923228 0.00928068 0.00969172]
|
|
|
|
mean value: 0.009253263473510742
|
|
|
|
key: test_mcc
|
|
value: [0.54772256 0.45454545 0.72572613 0.70352647 0.60000027 0.41516606
|
|
0.42714107 0.41785545 0.38461695 0.73234704]
|
|
|
|
mean value: 0.5408647457818198
|
|
|
|
key: train_mcc
|
|
value: [0.54699656 0.59399689 0.57120468 0.58602719 0.58602719 0.56692716
|
|
0.58164247 0.59178072 0.61296324 0.60857864]
|
|
|
|
mean value: 0.5846144748987409
|
|
|
|
key: test_accuracy
|
|
value: [0.77272727 0.72727273 0.86153846 0.84615385 0.8 0.70769231
|
|
0.70769231 0.70769231 0.69230769 0.86153846]
|
|
|
|
mean value: 0.7684615384615385
|
|
|
|
key: train_accuracy
|
|
value: [0.77303754 0.79522184 0.78534923 0.79216354 0.79216354 0.78194208
|
|
0.79045997 0.7955707 0.80579216 0.80408859]
|
|
|
|
mean value: 0.7915789198272002
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.72727273 0.85245902 0.82758621 0.79365079 0.6984127
|
|
0.6779661 0.6984127 0.70588235 0.85245902]
|
|
|
|
mean value: 0.7596006373973208
|
|
|
|
key: train_fscore
|
|
value: [0.76625659 0.7833935 0.78125 0.7844523 0.7844523 0.77060932
|
|
0.78458844 0.79020979 0.79858657 0.8 ]
|
|
|
|
mean value: 0.7843798808929663
|
|
|
|
key: test_precision
|
|
value: [0.8 0.72727273 0.89655172 0.92307692 0.80645161 0.70967742
|
|
0.76923077 0.73333333 0.68571429 0.92857143]
|
|
|
|
mean value: 0.7979880223595462
|
|
|
|
key: train_precision
|
|
value: [0.78985507 0.83141762 0.79787234 0.81617647 0.81617647 0.81439394
|
|
0.8057554 0.81003584 0.82783883 0.81560284]
|
|
|
|
mean value: 0.8125124820676404
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.72727273 0.8125 0.75 0.78125 0.6875
|
|
0.60606061 0.66666667 0.72727273 0.78787879]
|
|
|
|
mean value: 0.7273674242424243
|
|
|
|
key: train_recall
|
|
value: [0.7440273 0.74061433 0.76530612 0.75510204 0.75510204 0.73129252
|
|
0.76450512 0.77133106 0.77133106 0.78498294]
|
|
|
|
mean value: 0.7583594529962155
|
|
|
|
key: test_roc_auc
|
|
value: [0.77272727 0.72727273 0.86079545 0.84469697 0.79971591 0.70738636
|
|
0.7092803 0.70833333 0.69176136 0.86268939]
|
|
|
|
mean value: 0.7684659090909091
|
|
|
|
key: train_roc_auc
|
|
value: [0.77303754 0.79522184 0.78538344 0.79222679 0.79222679 0.78202851
|
|
0.79041583 0.79552947 0.80573356 0.80405609]
|
|
|
|
mean value: 0.7915859859302082
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.57142857 0.74285714 0.70588235 0.65789474 0.53658537
|
|
0.51282051 0.53658537 0.54545455 0.74285714]
|
|
|
|
mean value: 0.616775035229313
|
|
|
|
key: train_jcc
|
|
value: [0.62108262 0.64391691 0.64102564 0.64534884 0.64534884 0.62682216
|
|
0.64553314 0.65317919 0.66470588 0.66666667]
|
|
|
|
mean value: 0.6453629888889284
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01025414 0.01117158 0.0117538 0.01153493 0.00975585 0.0098505
|
|
0.01094294 0.01119208 0.01107335 0.01145792]
|
|
|
|
mean value: 0.010898709297180176
|
|
|
|
key: score_time
|
|
value: [0.0131917 0.0166955 0.01715803 0.01844025 0.01854348 0.01284981
|
|
0.01602077 0.0126543 0.01319885 0.01370668]
|
|
|
|
mean value: 0.015245938301086425
|
|
|
|
key: test_mcc
|
|
value: [0.48507125 0.39686563 0.74121539 0.66193182 0.66435774 0.61558566
|
|
0.50807424 0.54376443 0.57789674 0.66382036]
|
|
|
|
mean value: 0.58585832571338
|
|
|
|
key: train_mcc
|
|
value: [0.76139478 0.77064885 0.7650564 0.7766583 0.79148596 0.79647229
|
|
0.7797719 0.77411517 0.7678611 0.7935986 ]
|
|
|
|
mean value: 0.7777063364453793
|
|
|
|
key: test_accuracy
|
|
value: [0.74242424 0.6969697 0.86153846 0.83076923 0.81538462 0.8
|
|
0.75384615 0.76923077 0.78461538 0.83076923]
|
|
|
|
mean value: 0.7885547785547786
|
|
|
|
key: train_accuracy
|
|
value: [0.87713311 0.88054608 0.87393526 0.88245315 0.88926746 0.89267462
|
|
0.88415673 0.88245315 0.87734242 0.89097104]
|
|
|
|
mean value: 0.8830933013936776
|
|
|
|
key: test_fscore
|
|
value: [0.73846154 0.71428571 0.87323944 0.83076923 0.83783784 0.81690141
|
|
0.76470588 0.78873239 0.80555556 0.84057971]
|
|
|
|
mean value: 0.8011068708844366
|
|
|
|
key: train_fscore
|
|
value: [0.88498403 0.88924051 0.88615385 0.89201878 0.89859594 0.9010989
|
|
0.89308176 0.89064976 0.8875 0.89937107]
|
|
|
|
mean value: 0.8922694594792214
|
|
|
|
key: test_precision
|
|
value: [0.75 0.67567568 0.79487179 0.81818182 0.73809524 0.74358974
|
|
0.74285714 0.73684211 0.74358974 0.80555556]
|
|
|
|
mean value: 0.754925881767987
|
|
|
|
key: train_precision
|
|
value: [0.83183183 0.82890855 0.80898876 0.82608696 0.82997118 0.83673469
|
|
0.82798834 0.83136095 0.8184438 0.83381924]
|
|
|
|
mean value: 0.8274134313359605
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.75757576 0.96875 0.84375 0.96875 0.90625
|
|
0.78787879 0.84848485 0.87878788 0.87878788]
|
|
|
|
mean value: 0.8566287878787879
|
|
|
|
key: train_recall
|
|
value: [0.94539249 0.95904437 0.97959184 0.96938776 0.97959184 0.97619048
|
|
0.96928328 0.95904437 0.96928328 0.97610922]
|
|
|
|
mean value: 0.9682918901348935
|
|
|
|
key: test_roc_auc
|
|
value: [0.74242424 0.6969697 0.86316288 0.83096591 0.81770833 0.80160985
|
|
0.75331439 0.76799242 0.78314394 0.83001894]
|
|
|
|
mean value: 0.7887310606060606
|
|
|
|
key: train_roc_auc
|
|
value: [0.87713311 0.88054608 0.87375496 0.8823048 0.88911332 0.8925321
|
|
0.8843015 0.88258341 0.87749878 0.89111583]
|
|
|
|
mean value: 0.8830883889391934
|
|
|
|
key: test_jcc
|
|
value: [0.58536585 0.55555556 0.775 0.71052632 0.72093023 0.69047619
|
|
0.61904762 0.65116279 0.6744186 0.725 ]
|
|
|
|
mean value: 0.6707483162434352
|
|
|
|
key: train_jcc
|
|
value: [0.79369628 0.8005698 0.79558011 0.80508475 0.81586402 0.82
|
|
0.80681818 0.80285714 0.79775281 0.81714286]
|
|
|
|
mean value: 0.8055365945371218
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03306079 0.03242803 0.03047276 0.03302431 0.03214908 0.03250813
|
|
0.03191948 0.0324676 0.03009939 0.03266764]
|
|
|
|
mean value: 0.03207972049713135
|
|
|
|
key: score_time
|
|
value: [0.01489544 0.01417732 0.01466584 0.01532269 0.01460743 0.01509666
|
|
0.01482368 0.01405001 0.01482248 0.01481605]
|
|
|
|
mean value: 0.01472775936126709
|
|
|
|
key: test_mcc
|
|
value: [0.84887469 0.73029674 0.76761091 0.91144345 0.66477003 0.72348485
|
|
0.72348485 0.60037879 0.63153153 0.78483448]
|
|
|
|
mean value: 0.7386710313079483
|
|
|
|
key: train_mcc
|
|
value: [0.80206647 0.8191174 0.83662524 0.83305279 0.85016022 0.82624039
|
|
0.83986719 0.82971486 0.80583708 0.85740514]
|
|
|
|
mean value: 0.8300086779731306
|
|
|
|
key: test_accuracy
|
|
value: [0.92424242 0.86363636 0.87692308 0.95384615 0.83076923 0.86153846
|
|
0.86153846 0.8 0.81538462 0.89230769]
|
|
|
|
mean value: 0.868018648018648
|
|
|
|
key: train_accuracy
|
|
value: [0.90102389 0.90955631 0.91822828 0.9165247 0.92504259 0.91311755
|
|
0.91993186 0.91482112 0.90289608 0.92844974]
|
|
|
|
mean value: 0.9149592129820747
|
|
|
|
key: test_fscore
|
|
value: [0.92537313 0.86956522 0.88571429 0.95081967 0.8358209 0.86153846
|
|
0.86153846 0.8 0.82352941 0.89552239]
|
|
|
|
mean value: 0.8709421927988814
|
|
|
|
key: train_fscore
|
|
value: [0.90136054 0.90940171 0.91919192 0.91680815 0.92567568 0.91311755
|
|
0.91965812 0.91408935 0.90322581 0.9295302 ]
|
|
|
|
mean value: 0.9152059019272198
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.83333333 0.81578947 1. 0.8 0.84848485
|
|
0.875 0.8125 0.8 0.88235294]
|
|
|
|
mean value: 0.8579225302561216
|
|
|
|
key: train_precision
|
|
value: [0.89830508 0.9109589 0.91 0.91525424 0.91946309 0.91467577
|
|
0.92123288 0.92041522 0.89864865 0.91419142]
|
|
|
|
mean value: 0.9123145250726284
|
|
|
|
key: test_recall
|
|
value: [0.93939394 0.90909091 0.96875 0.90625 0.875 0.875
|
|
0.84848485 0.78787879 0.84848485 0.90909091]
|
|
|
|
mean value: 0.8867424242424242
|
|
|
|
key: train_recall
|
|
value: [0.90443686 0.90784983 0.92857143 0.91836735 0.93197279 0.91156463
|
|
0.91808874 0.90784983 0.90784983 0.94539249]
|
|
|
|
mean value: 0.9181943767267999
|
|
|
|
key: test_roc_auc
|
|
value: [0.92424242 0.86363636 0.87831439 0.953125 0.83143939 0.86174242
|
|
0.86174242 0.80018939 0.81486742 0.89204545]
|
|
|
|
mean value: 0.8681344696969697
|
|
|
|
key: train_roc_auc
|
|
value: [0.90102389 0.90955631 0.91821063 0.91652156 0.92503076 0.9131202
|
|
0.91992872 0.91480927 0.90290451 0.92847856]
|
|
|
|
mean value: 0.9149584407141697
|
|
|
|
key: test_jcc
|
|
value: [0.86111111 0.76923077 0.79487179 0.90625 0.71794872 0.75675676
|
|
0.75675676 0.66666667 0.7 0.81081081]
|
|
|
|
mean value: 0.7740403384153385
|
|
|
|
key: train_jcc
|
|
value: [0.82043344 0.8338558 0.85046729 0.84639498 0.86163522 0.84012539
|
|
0.85126582 0.84177215 0.82352941 0.86833856]
|
|
|
|
mean value: 0.843781806636849
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.36617112 2.41984844 2.40280962 2.34540963 2.35769773 2.4068315
|
|
2.38700414 2.36304808 2.38966513 2.34983063]
|
|
|
|
mean value: 2.3788316011428834
|
|
|
|
key: score_time
|
|
value: [0.01269841 0.01472521 0.01361251 0.01399899 0.01547694 0.01341414
|
|
0.01428437 0.01328421 0.01366544 0.02118802]
|
|
|
|
mean value: 0.014634823799133301
|
|
|
|
key: test_mcc
|
|
value: [0.91287093 0.91287093 0.94028478 1. 0.88382395 0.91168461
|
|
1. 0.85599665 0.88340557 0.94017476]
|
|
|
|
mean value: 0.9241112183583176
|
|
|
|
key: train_mcc
|
|
value: [0.99659284 0.99659284 0.9965986 0.9965986 0.9965986 0.9965986
|
|
0.99659864 0.99659864 1. 0.99659864]
|
|
|
|
mean value: 0.996937598865393
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.95454545 0.96923077 1. 0.93846154 0.95384615
|
|
1. 0.92307692 0.93846154 0.96923077]
|
|
|
|
mean value: 0.9601398601398602
|
|
|
|
key: train_accuracy
|
|
value: [0.99829352 0.99829352 0.99829642 0.99829642 0.99829642 0.99829642
|
|
0.99829642 0.99829642 1. 0.99829642]
|
|
|
|
mean value: 0.9984661988127286
|
|
|
|
key: test_fscore
|
|
value: [0.95652174 0.95652174 0.96969697 1. 0.94117647 0.95522388
|
|
1. 0.92957746 0.94285714 0.97058824]
|
|
|
|
mean value: 0.9622163642083083
|
|
|
|
key: train_fscore
|
|
value: [0.99829642 0.99829642 0.99830221 0.99830221 0.99830221 0.99830221
|
|
0.99829642 0.99829642 1. 0.99829642]
|
|
|
|
mean value: 0.9984690940959036
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.91666667 0.94117647 1. 0.88888889 0.91428571
|
|
1. 0.86842105 0.89189189 0.94285714]
|
|
|
|
mean value: 0.9280854494476786
|
|
|
|
key: train_precision
|
|
value: [0.99659864 0.99659864 0.99661017 0.99661017 0.99661017 0.99661017
|
|
0.99659864 0.99659864 1. 0.99659864]
|
|
|
|
mean value: 0.9969433875245013
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.95454545 0.96969697 1. 0.93939394 0.95454545
|
|
1. 0.921875 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9600852272727273
|
|
|
|
key: train_roc_auc
|
|
value: [0.99829352 0.99829352 0.99829352 0.99829352 0.99829352 0.99829352
|
|
0.99829932 0.99829932 1. 0.99829932]
|
|
|
|
mean value: 0.9984659051333844
|
|
|
|
key: test_jcc
|
|
value: [0.91666667 0.91666667 0.94117647 1. 0.88888889 0.91428571
|
|
1. 0.86842105 0.89189189 0.94285714]
|
|
|
|
mean value: 0.9280854494476786
|
|
|
|
key: train_jcc
|
|
value: [0.99659864 0.99659864 0.99661017 0.99661017 0.99661017 0.99661017
|
|
0.99659864 0.99659864 1. 0.99659864]
|
|
|
|
mean value: 0.9969433875245013
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03670931 0.02427554 0.02596927 0.0255847 0.02775073 0.02558708
|
|
0.02727795 0.0245955 0.02489996 0.02757144]
|
|
|
|
mean value: 0.027022147178649904
|
|
|
|
key: score_time
|
|
value: [0.01371646 0.00959873 0.00935125 0.00904441 0.00926328 0.00973654
|
|
0.00950623 0.00951982 0.00918865 0.00926661]
|
|
|
|
mean value: 0.00981919765472412
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 0.9701425 0.96969697 0.96969697 0.96969697 0.94028478
|
|
1. 0.96966868 0.91144345 0.94017476]
|
|
|
|
mean value: 0.9581929030038463
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.98484848 0.98461538 0.98461538 0.98461538 0.96923077
|
|
1. 0.98461538 0.95384615 0.96923077]
|
|
|
|
mean value: 0.9785314685314686
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 0.98507463 0.98461538 0.98461538 0.98461538 0.96969697
|
|
1. 0.98507463 0.95652174 0.97058824]
|
|
|
|
mean value: 0.9791390586993137
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.97058824 0.96969697 0.96969697 0.96969697 0.94117647
|
|
1. 0.97058824 0.91666667 0.94285714]
|
|
|
|
mean value: 0.9593824802648332
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.98484848 0.98484848 0.98484848 0.98484848 0.96969697
|
|
1. 0.984375 0.953125 0.96875 ]
|
|
|
|
mean value: 0.9785037878787879
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 0.97058824 0.96969697 0.96969697 0.96969697 0.94117647
|
|
1. 0.97058824 0.91666667 0.94285714]
|
|
|
|
mean value: 0.9593824802648332
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12954378 0.128371 0.13171649 0.13734126 0.13280702 0.13321447
|
|
0.12788749 0.12463093 0.12696791 0.12705946]
|
|
|
|
mean value: 0.12995398044586182
|
|
|
|
key: score_time
|
|
value: [0.01783633 0.01957083 0.01873755 0.02003932 0.01885629 0.01862621
|
|
0.01799321 0.01798415 0.01792145 0.01805067]
|
|
|
|
mean value: 0.018561601638793945
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 1. 0.94028478 1. 0.90814394 0.96969697
|
|
0.96966868 0.96966868 0.91144345 1. ]
|
|
|
|
mean value: 0.9610030456147299
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 1. 0.96923077 1. 0.95384615 0.98461538
|
|
0.98461538 0.98461538 0.95384615 1. ]
|
|
|
|
mean value: 0.98004662004662
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 1. 0.96969697 1. 0.95384615 0.98461538
|
|
0.98507463 0.98507463 0.95652174 1. ]
|
|
|
|
mean value: 0.9805417736314403
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94285714 1. 0.94117647 1. 0.93939394 0.96969697
|
|
0.97058824 0.97058824 0.91666667 1. ]
|
|
|
|
mean value: 0.965096765979119
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.96875 1. 1. 1. 1.
|
|
1. ]
|
|
|
|
mean value: 0.996875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 1. 0.96969697 1. 0.95407197 0.98484848
|
|
0.984375 0.984375 0.953125 1. ]
|
|
|
|
mean value: 0.9800189393939394
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 1. 0.94117647 1. 0.91176471 0.96969697
|
|
0.97058824 0.97058824 0.91666667 1. ]
|
|
|
|
mean value: 0.9623338426279603
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01057839 0.01049995 0.01057529 0.01068735 0.01063061 0.01047444
|
|
0.01177788 0.01060271 0.0109849 0.01069641]
|
|
|
|
mean value: 0.010750794410705566
|
|
|
|
key: score_time
|
|
value: [0.00883055 0.00891185 0.00885177 0.00892806 0.00886106 0.00881624
|
|
0.00884795 0.00892973 0.00899482 0.0088551 ]
|
|
|
|
mean value: 0.008882713317871094
|
|
|
|
key: test_mcc
|
|
value: [0.80622577 0.78086881 0.68030134 0.91168461 0.68964536 0.88382395
|
|
0.80282704 0.88340557 0.75148662 0.91144345]
|
|
|
|
mean value: 0.8101712532464146
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.89393939 0.87878788 0.81538462 0.95384615 0.83076923 0.93846154
|
|
0.89230769 0.93846154 0.86153846 0.95384615]
|
|
|
|
mean value: 0.8957342657342657
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90410959 0.89189189 0.84210526 0.95522388 0.84931507 0.94117647
|
|
0.90410959 0.94285714 0.88 0.95652174]
|
|
|
|
mean value: 0.9067310634797957
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.825 0.80487805 0.72727273 0.91428571 0.75609756 0.88888889
|
|
0.825 0.89189189 0.78571429 0.91666667]
|
|
|
|
mean value: 0.8335695784476272
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.96875 1. 1. 1. 1.
|
|
1. ]
|
|
|
|
mean value: 0.996875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.89393939 0.87878788 0.81818182 0.95454545 0.83285985 0.93939394
|
|
0.890625 0.9375 0.859375 0.953125 ]
|
|
|
|
mean value: 0.8958333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.825 0.80487805 0.72727273 0.91428571 0.73809524 0.88888889
|
|
0.825 0.89189189 0.78571429 0.91666667]
|
|
|
|
mean value: 0.8317693461595901
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.83020997 1.8339448 1.83662057 1.85287166 1.85693479 1.83377242
|
|
1.83173656 1.83148408 1.90730047 1.97430134]
|
|
|
|
mean value: 1.8589176654815673
|
|
|
|
key: score_time
|
|
value: [0.09384489 0.09449744 0.09404278 0.09543896 0.09338045 0.09436655
|
|
0.09370232 0.09369016 0.10243464 0.10232711]
|
|
|
|
mean value: 0.09577252864837646
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 0.9701425 0.96969697 1. 1. 0.96969697
|
|
0.96966868 0.96966868 0.91144345 0.96966868]
|
|
|
|
mean value: 0.9671109886051606
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.98484848 0.98461538 1. 1. 0.98461538
|
|
0.98461538 0.98461538 0.95384615 0.98461538]
|
|
|
|
mean value: 0.9831468531468532
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 0.98507463 0.98461538 1. 1. 0.98461538
|
|
0.98507463 0.98507463 0.95652174 0.98507463]
|
|
|
|
mean value: 0.9836639251118008
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.97058824 0.96969697 1. 1. 0.96969697
|
|
0.97058824 0.97058824 0.91666667 0.97058824]
|
|
|
|
mean value: 0.9681270690094219
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.98484848 0.98484848 1. 1. 0.98484848
|
|
0.984375 0.984375 0.953125 0.984375 ]
|
|
|
|
mean value: 0.9830492424242424
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 0.97058824 0.96969697 1. 1. 0.96969697
|
|
0.97058824 0.97058824 0.91666667 0.97058824]
|
|
|
|
mean value: 0.9681270690094219
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.96820188 1.01171207 1.01190472 1.01802468 1.00538397 0.99444675
|
|
1.08758998 1.04639006 1.02571559 1.07481289]
|
|
|
|
mean value: 1.024418258666992
|
|
|
|
key: score_time
|
|
value: [0.24577761 0.13931179 0.27650094 0.25720572 0.26955438 0.25722146
|
|
0.28553748 0.26408482 0.23240495 0.24349713]
|
|
|
|
mean value: 0.24710962772369385
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 0.9701425 0.96969697 0.96969697 0.96969697 0.96969697
|
|
1. 0.96966868 0.84644588 0.87867338]
|
|
|
|
mean value: 0.948484226747499
|
|
|
|
key: train_mcc
|
|
value: [0.9830783 0.9763879 0.97976106 0.97642665 0.97642665 0.98310636
|
|
0.97642854 0.97976246 0.98301582 0.96601886]
|
|
|
|
mean value: 0.9780412605604475
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.98484848 0.98461538 0.98461538 0.98461538 0.98461538
|
|
1. 0.98461538 0.92307692 0.93846154]
|
|
|
|
mean value: 0.973916083916084
|
|
|
|
key: train_accuracy
|
|
value: [0.99146758 0.98805461 0.98977853 0.98807496 0.98807496 0.99148211
|
|
0.98807496 0.98977853 0.99148211 0.98296422]
|
|
|
|
mean value: 0.9889232576123169
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 0.98507463 0.98461538 0.98461538 0.98461538 0.98461538
|
|
1. 0.98507463 0.92537313 0.9375 ]
|
|
|
|
mean value: 0.9742072161815358
|
|
|
|
key: train_fscore
|
|
value: [0.99153976 0.98819562 0.98989899 0.98823529 0.98823529 0.9915683
|
|
0.98819562 0.98986486 0.99151104 0.98305085]
|
|
|
|
mean value: 0.9890295617048415
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.97058824 0.96969697 0.96969697 0.96969697 0.96969697
|
|
1. 0.97058824 0.91176471 0.96774194]
|
|
|
|
mean value: 0.964232813359948
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.98322148 0.97666667 0.98 0.97674419 0.97674419 0.98327759
|
|
0.97666667 0.97993311 0.98648649 0.97643098]
|
|
|
|
mean value: 0.9796171347195024
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.93939394 0.90909091]
|
|
|
|
mean value: 0.9848484848484849
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.99658703 0.98976109]
|
|
|
|
mean value: 0.9986348122866894
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.98484848 0.98484848 0.98484848 0.98484848 0.98484848
|
|
1. 0.984375 0.92282197 0.93892045]
|
|
|
|
mean value: 0.9740056818181818
|
|
|
|
key: train_roc_auc
|
|
value: [0.99146758 0.98805461 0.98976109 0.98805461 0.98805461 0.99146758
|
|
0.98809524 0.98979592 0.99149079 0.98297578]
|
|
|
|
mean value: 0.988921780316222
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 0.97058824 0.96969697 0.96969697 0.96969697 0.96969697
|
|
1. 0.97058824 0.86111111 0.88235294]
|
|
|
|
mean value: 0.9506285544520838
|
|
|
|
key: train_jcc
|
|
value: [0.98322148 0.97666667 0.98 0.97674419 0.97674419 0.98327759
|
|
0.97666667 0.97993311 0.98316498 0.96666667]
|
|
|
|
mean value: 0.978308553410921
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02450657 0.01054621 0.0114634 0.01047778 0.01044083 0.01177239
|
|
0.01055622 0.01051903 0.01049495 0.01219797]
|
|
|
|
mean value: 0.012297534942626953
|
|
|
|
key: score_time
|
|
value: [0.00973797 0.00886369 0.00927877 0.00881004 0.00888014 0.0099175
|
|
0.00885415 0.00883937 0.00888014 0.0097239 ]
|
|
|
|
mean value: 0.009178566932678222
|
|
|
|
key: test_mcc
|
|
value: [0.54772256 0.45454545 0.72572613 0.70352647 0.60000027 0.41516606
|
|
0.42714107 0.41785545 0.38461695 0.73234704]
|
|
|
|
mean value: 0.5408647457818198
|
|
|
|
key: train_mcc
|
|
value: [0.54699656 0.59399689 0.57120468 0.58602719 0.58602719 0.56692716
|
|
0.58164247 0.59178072 0.61296324 0.60857864]
|
|
|
|
mean value: 0.5846144748987409
|
|
|
|
key: test_accuracy
|
|
value: [0.77272727 0.72727273 0.86153846 0.84615385 0.8 0.70769231
|
|
0.70769231 0.70769231 0.69230769 0.86153846]
|
|
|
|
mean value: 0.7684615384615385
|
|
|
|
key: train_accuracy
|
|
value: [0.77303754 0.79522184 0.78534923 0.79216354 0.79216354 0.78194208
|
|
0.79045997 0.7955707 0.80579216 0.80408859]
|
|
|
|
mean value: 0.7915789198272002
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.72727273 0.85245902 0.82758621 0.79365079 0.6984127
|
|
0.6779661 0.6984127 0.70588235 0.85245902]
|
|
|
|
mean value: 0.7596006373973208
|
|
|
|
key: train_fscore
|
|
value: [0.76625659 0.7833935 0.78125 0.7844523 0.7844523 0.77060932
|
|
0.78458844 0.79020979 0.79858657 0.8 ]
|
|
|
|
mean value: 0.7843798808929663
|
|
|
|
key: test_precision
|
|
value: [0.8 0.72727273 0.89655172 0.92307692 0.80645161 0.70967742
|
|
0.76923077 0.73333333 0.68571429 0.92857143]
|
|
|
|
mean value: 0.7979880223595462
|
|
|
|
key: train_precision
|
|
value: [0.78985507 0.83141762 0.79787234 0.81617647 0.81617647 0.81439394
|
|
0.8057554 0.81003584 0.82783883 0.81560284]
|
|
|
|
mean value: 0.8125124820676404
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.72727273 0.8125 0.75 0.78125 0.6875
|
|
0.60606061 0.66666667 0.72727273 0.78787879]
|
|
|
|
mean value: 0.7273674242424243
|
|
|
|
key: train_recall
|
|
value: [0.7440273 0.74061433 0.76530612 0.75510204 0.75510204 0.73129252
|
|
0.76450512 0.77133106 0.77133106 0.78498294]
|
|
|
|
mean value: 0.7583594529962155
|
|
|
|
key: test_roc_auc
|
|
value: [0.77272727 0.72727273 0.86079545 0.84469697 0.79971591 0.70738636
|
|
0.7092803 0.70833333 0.69176136 0.86268939]
|
|
|
|
mean value: 0.7684659090909091
|
|
|
|
key: train_roc_auc
|
|
value: [0.77303754 0.79522184 0.78538344 0.79222679 0.79222679 0.78202851
|
|
0.79041583 0.79552947 0.80573356 0.80405609]
|
|
|
|
mean value: 0.7915859859302082
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.57142857 0.74285714 0.70588235 0.65789474 0.53658537
|
|
0.51282051 0.53658537 0.54545455 0.74285714]
|
|
|
|
mean value: 0.616775035229313
|
|
|
|
key: train_jcc
|
|
value: [0.62108262 0.64391691 0.64102564 0.64534884 0.64534884 0.62682216
|
|
0.64553314 0.65317919 0.66470588 0.66666667]
|
|
|
|
mean value: 0.6453629888889284
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.10872388 0.07221985 0.08031511 0.07965851 0.07820702 0.0832839
|
|
0.0768342 0.07378078 0.08042312 0.08786035]
|
|
|
|
mean value: 0.08213067054748535
|
|
|
|
key: score_time
|
|
value: [0.0114131 0.01115036 0.01119137 0.01129603 0.0114994 0.01149344
|
|
0.01123643 0.01188612 0.01131845 0.01106858]
|
|
|
|
mean value: 0.011355328559875488
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 0.9701425 0.96969697 0.96969697 1. 0.96969697
|
|
0.96966868 0.96966868 0.94017476 0.96966868]
|
|
|
|
mean value: 0.9669538159281116
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.98484848 0.98461538 0.98461538 1. 0.98461538
|
|
0.98461538 0.98461538 0.96923077 0.98461538]
|
|
|
|
mean value: 0.9831468531468532
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 0.98507463 0.98461538 0.98461538 1. 0.98461538
|
|
0.98507463 0.98507463 0.97058824 0.98507463]
|
|
|
|
mean value: 0.9835321131897076
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.97058824 0.96969697 0.96969697 1. 0.96969697
|
|
0.97058824 0.97058824 0.94285714 0.97058824]
|
|
|
|
mean value: 0.9677158135981665
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.98484848 0.98484848 0.98484848 1. 0.98484848
|
|
0.984375 0.984375 0.96875 0.984375 ]
|
|
|
|
mean value: 0.983096590909091
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 0.97058824 0.96969697 0.96969697 1. 0.96969697
|
|
0.97058824 0.97058824 0.94285714 0.97058824]
|
|
|
|
mean value: 0.9677158135981665
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04492283 0.06440902 0.07062221 0.09072375 0.05423832 0.0812223
|
|
0.11205435 0.06770706 0.04995537 0.08655524]
|
|
|
|
mean value: 0.07224104404449463
|
|
|
|
key: score_time
|
|
value: [0.01941609 0.01222134 0.01941872 0.01229501 0.01221132 0.01934242
|
|
0.03134274 0.01228213 0.01945925 0.0196538 ]
|
|
|
|
mean value: 0.0177642822265625
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.8196886 0.83005736 0.93844697 0.82191818 0.94028478
|
|
0.93844697 0.91144345 0.96966868 0.90814394]
|
|
|
|
mean value: 0.8987607527266495
|
|
|
|
key: train_mcc
|
|
value: [0.95221843 0.9488552 0.95232125 0.95238704 0.95913582 0.95232125
|
|
0.95238925 0.95232236 0.95571266 0.95584685]
|
|
|
|
mean value: 0.9533510091791351
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.90909091 0.90769231 0.96923077 0.90769231 0.96923077
|
|
0.96923077 0.95384615 0.98461538 0.95384615]
|
|
|
|
mean value: 0.9479020979020979
|
|
|
|
key: train_accuracy
|
|
value: [0.97610922 0.97440273 0.97614991 0.97614991 0.97955707 0.97614991
|
|
0.97614991 0.97614991 0.97785349 0.97785349]
|
|
|
|
mean value: 0.9766525574012593
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.91176471 0.91428571 0.96875 0.91176471 0.96969697
|
|
0.96969697 0.95652174 0.98507463 0.95384615]
|
|
|
|
mean value: 0.9496625465883635
|
|
|
|
key: train_fscore
|
|
value: [0.97610922 0.97453311 0.97627119 0.97635135 0.97966102 0.97627119
|
|
0.97627119 0.97619048 0.97785349 0.97800338]
|
|
|
|
mean value: 0.9767515602219685
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.88571429 0.84210526 0.96875 0.86111111 0.94117647
|
|
0.96969697 0.91666667 0.97058824 0.96875 ]
|
|
|
|
mean value: 0.9265735472817516
|
|
|
|
key: train_precision
|
|
value: [0.97610922 0.96959459 0.97297297 0.96979866 0.97635135 0.97297297
|
|
0.96969697 0.97288136 0.97619048 0.96979866]
|
|
|
|
mean value: 0.9726367224164848
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.93939394 1. 0.96875 0.96875 1.
|
|
0.96969697 1. 1. 0.93939394]
|
|
|
|
mean value: 0.9755681818181818
|
|
|
|
key: train_recall
|
|
value: [0.97610922 0.97952218 0.97959184 0.9829932 0.9829932 0.97959184
|
|
0.98293515 0.97952218 0.97952218 0.98634812]
|
|
|
|
mean value: 0.9809129112395811
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.90909091 0.90909091 0.96922348 0.90861742 0.96969697
|
|
0.96922348 0.953125 0.984375 0.95407197]
|
|
|
|
mean value: 0.9481060606060606
|
|
|
|
key: train_roc_auc
|
|
value: [0.97610922 0.97440273 0.97614404 0.97613824 0.97955121 0.97614404
|
|
0.97616145 0.97615565 0.97785633 0.97786794]
|
|
|
|
mean value: 0.9766530844419679
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.83783784 0.84210526 0.93939394 0.83783784 0.94117647
|
|
0.94117647 0.91666667 0.97058824 0.91176471]
|
|
|
|
mean value: 0.9052833141532832
|
|
|
|
key: train_jcc
|
|
value: [0.95333333 0.95033113 0.95364238 0.95379538 0.96013289 0.95364238
|
|
0.95364238 0.95348837 0.95666667 0.95695364]
|
|
|
|
mean value: 0.9545628562526227
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01583385 0.01057649 0.01054573 0.01007676 0.0101521 0.0101788
|
|
0.01006889 0.01036167 0.01008558 0.01018763]
|
|
|
|
mean value: 0.010806751251220704
|
|
|
|
key: score_time
|
|
value: [0.01199889 0.00923753 0.00887418 0.00877666 0.00880337 0.00871682
|
|
0.00870228 0.00878859 0.00886726 0.00878167]
|
|
|
|
mean value: 0.009154725074768066
|
|
|
|
key: test_mcc
|
|
value: [0.5768179 0.55815631 0.56211492 0.72572613 0.4983497 0.60037879
|
|
0.51782513 0.3844697 0.50807424 0.69326017]
|
|
|
|
mean value: 0.5625172996514779
|
|
|
|
key: train_mcc
|
|
value: [0.57434639 0.60444771 0.59275601 0.61358451 0.61163099 0.61928315
|
|
0.59939415 0.61305297 0.59584164 0.61988238]
|
|
|
|
mean value: 0.6044219886260209
|
|
|
|
key: test_accuracy
|
|
value: [0.78787879 0.77272727 0.76923077 0.86153846 0.73846154 0.8
|
|
0.75384615 0.69230769 0.75384615 0.84615385]
|
|
|
|
mean value: 0.7775990675990676
|
|
|
|
key: train_accuracy
|
|
value: [0.78668942 0.80204778 0.7955707 0.80579216 0.80579216 0.80919932
|
|
0.79897785 0.80579216 0.79727428 0.80919932]
|
|
|
|
mean value: 0.8016335157072172
|
|
|
|
key: test_fscore
|
|
value: [0.79411765 0.79452055 0.79452055 0.85245902 0.76712329 0.8
|
|
0.73333333 0.6969697 0.76470588 0.85294118]
|
|
|
|
mean value: 0.785069113614047
|
|
|
|
key: train_fscore
|
|
value: [0.79270315 0.80536913 0.80327869 0.81372549 0.80743243 0.81456954
|
|
0.80528053 0.81188119 0.80330579 0.81518152]
|
|
|
|
mean value: 0.8072727445453226
|
|
|
|
key: test_precision
|
|
value: [0.77142857 0.725 0.70731707 0.89655172 0.68292683 0.78787879
|
|
0.81481481 0.6969697 0.74285714 0.82857143]
|
|
|
|
mean value: 0.7654316069097398
|
|
|
|
key: train_precision
|
|
value: [0.77096774 0.79207921 0.77531646 0.78301887 0.80201342 0.79354839
|
|
0.77955272 0.78594249 0.77884615 0.78913738]
|
|
|
|
mean value: 0.7850422825098152
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.87878788 0.90625 0.8125 0.875 0.8125
|
|
0.66666667 0.6969697 0.78787879 0.87878788]
|
|
|
|
mean value: 0.8133522727272727
|
|
|
|
key: train_recall
|
|
value: [0.81569966 0.81911263 0.83333333 0.84693878 0.81292517 0.83673469
|
|
0.83276451 0.83959044 0.82935154 0.84300341]
|
|
|
|
mean value: 0.8309454157089458
|
|
|
|
key: test_roc_auc
|
|
value: [0.78787879 0.77272727 0.77130682 0.86079545 0.7405303 0.80018939
|
|
0.75520833 0.69223485 0.75331439 0.84564394]
|
|
|
|
mean value: 0.7779829545454546
|
|
|
|
key: train_roc_auc
|
|
value: [0.78668942 0.80204778 0.79550626 0.80572195 0.80577999 0.80915233
|
|
0.79903531 0.80584964 0.79732883 0.80925681]
|
|
|
|
mean value: 0.8016368322072857
|
|
|
|
key: test_jcc
|
|
value: [0.65853659 0.65909091 0.65909091 0.74285714 0.62222222 0.66666667
|
|
0.57894737 0.53488372 0.61904762 0.74358974]
|
|
|
|
mean value: 0.6484932887282351
|
|
|
|
key: train_jcc
|
|
value: [0.65659341 0.6741573 0.67123288 0.68595041 0.67705382 0.68715084
|
|
0.67403315 0.68333333 0.67127072 0.68802228]
|
|
|
|
mean value: 0.6768798147110306
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02534986 0.02770162 0.0270679 0.02558327 0.02563214 0.0259738
|
|
0.03067756 0.02564144 0.03274512 0.02606511]
|
|
|
|
mean value: 0.027243781089782714
|
|
|
|
key: score_time
|
|
value: [0.01025176 0.01149607 0.01207352 0.01202536 0.01203775 0.01206684
|
|
0.01200747 0.01207829 0.0120697 0.01209188]
|
|
|
|
mean value: 0.011819863319396972
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.84887469 0.88382395 0.90814394 0.84995597 0.94028478
|
|
0.94028478 0.82191818 0.91144345 0.91168461]
|
|
|
|
mean value: 0.8925922950912009
|
|
|
|
key: train_mcc
|
|
value: [0.96596033 0.94559077 0.93552737 0.96252656 0.95913582 0.95920405
|
|
0.92935637 0.91635122 0.95920405 0.95913582]
|
|
|
|
mean value: 0.9491992348323932
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.92424242 0.93846154 0.95384615 0.92307692 0.96923077
|
|
0.96923077 0.90769231 0.95384615 0.95384615]
|
|
|
|
mean value: 0.9448018648018648
|
|
|
|
key: train_accuracy
|
|
value: [0.98293515 0.97269625 0.96763203 0.98126065 0.97955707 0.97955707
|
|
0.96422487 0.95741056 0.97955707 0.97955707]
|
|
|
|
mean value: 0.9744387787733079
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.92307692 0.94117647 0.95384615 0.92537313 0.96969697
|
|
0.96875 0.90322581 0.95652174 0.95238095]
|
|
|
|
mean value: 0.9447894303345794
|
|
|
|
key: train_fscore
|
|
value: [0.98281787 0.97241379 0.96806723 0.98132428 0.97966102 0.97945205
|
|
0.96335079 0.95606327 0.97966102 0.97945205]
|
|
|
|
mean value: 0.9742263365568498
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.9375 0.88888889 0.93939394 0.88571429 0.94117647
|
|
1. 0.96551724 0.91666667 1. ]
|
|
|
|
mean value: 0.9443607492631326
|
|
|
|
key: train_precision
|
|
value: [0.98961938 0.9825784 0.95681063 0.97966102 0.97635135 0.9862069
|
|
0.98571429 0.98550725 0.97306397 0.98281787]
|
|
|
|
mean value: 0.9798331045027515
|
|
|
|
key: test_recall
|
|
value: [0.93939394 0.90909091 1. 0.96875 0.96875 1.
|
|
0.93939394 0.84848485 1. 0.90909091]
|
|
|
|
mean value: 0.9482954545454545
|
|
|
|
key: train_recall
|
|
value: [0.97610922 0.96245734 0.97959184 0.9829932 0.9829932 0.97278912
|
|
0.94197952 0.92832765 0.98634812 0.97610922]
|
|
|
|
mean value: 0.9689698404959253
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.92424242 0.93939394 0.95407197 0.92376894 0.96969697
|
|
0.96969697 0.90861742 0.953125 0.95454545]
|
|
|
|
mean value: 0.9451704545454546
|
|
|
|
key: train_roc_auc
|
|
value: [0.98293515 0.97269625 0.96761162 0.98125769 0.97955121 0.97956862
|
|
0.96418704 0.9573611 0.97956862 0.97955121]
|
|
|
|
mean value: 0.9744288500383088
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.85714286 0.88888889 0.91176471 0.86111111 0.94117647
|
|
0.93939394 0.82352941 0.91666667 0.90909091]
|
|
|
|
mean value: 0.8960529666412019
|
|
|
|
key: train_jcc
|
|
value: [0.96621622 0.94630872 0.93811075 0.96333333 0.96013289 0.95973154
|
|
0.92929293 0.91582492 0.96013289 0.95973154]
|
|
|
|
mean value: 0.9498815736664497
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01519728 0.02194762 0.02023983 0.02044773 0.02418399 0.02258992
|
|
0.02171874 0.02219558 0.01945853 0.02303457]
|
|
|
|
mean value: 0.02110137939453125
|
|
|
|
key: score_time
|
|
value: [0.01065183 0.01201725 0.01206088 0.01222491 0.01207542 0.01208949
|
|
0.01208282 0.01207709 0.01206946 0.0120399 ]
|
|
|
|
mean value: 0.011938905715942383
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.90950859 0.56270396 0.84644588 0.93844697 0.91168461
|
|
0.63287203 0.90814394 0.84644588 0.91168461]
|
|
|
|
mean value: 0.8377445066997006
|
|
|
|
key: train_mcc
|
|
value: [0.91913857 0.9217648 0.62108455 0.94212842 0.96265981 0.97274268
|
|
0.72617124 0.90990542 0.95238925 0.96592835]
|
|
|
|
mean value: 0.889391309620473
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.95454545 0.73846154 0.92307692 0.96923077 0.95384615
|
|
0.78461538 0.95384615 0.92307692 0.95384615]
|
|
|
|
mean value: 0.9109090909090909
|
|
|
|
key: train_accuracy
|
|
value: [0.95904437 0.96075085 0.77853492 0.97103918 0.98126065 0.98637138
|
|
0.85008518 0.95400341 0.97614991 0.98296422]
|
|
|
|
mean value: 0.940020408044607
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.95522388 0.79012346 0.92063492 0.96875 0.95522388
|
|
0.73076923 0.95384615 0.92537313 0.95238095]
|
|
|
|
mean value: 0.9107549490540785
|
|
|
|
key: train_fscore
|
|
value: [0.96 0.96121417 0.8189415 0.97094017 0.98145025 0.98639456
|
|
0.82677165 0.95238095 0.97627119 0.98293515]
|
|
|
|
mean value: 0.9417299597102607
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.94117647 0.65306122 0.93548387 0.96875 0.91428571
|
|
1. 0.96875 0.91176471 1. ]
|
|
|
|
mean value: 0.9234448456802076
|
|
|
|
key: train_precision
|
|
value: [0.93811075 0.95 0.69339623 0.97594502 0.97324415 0.98639456
|
|
0.97674419 0.98540146 0.96969697 0.98293515]
|
|
|
|
mean value: 0.9431868466944326
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.96969697 1. 0.90625 0.96875 1.
|
|
0.57575758 0.93939394 0.93939394 0.90909091]
|
|
|
|
mean value: 0.9178030303030303
|
|
|
|
key: train_recall
|
|
value: [0.98293515 0.97269625 1. 0.96598639 0.98979592 0.98639456
|
|
0.71672355 0.92150171 0.98293515 0.98293515]
|
|
|
|
mean value: 0.9501903833205637
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.95454545 0.74242424 0.92282197 0.96922348 0.95454545
|
|
0.78787879 0.95407197 0.92282197 0.95454545]
|
|
|
|
mean value: 0.9117424242424242
|
|
|
|
key: train_roc_auc
|
|
value: [0.95904437 0.96075085 0.778157 0.9710478 0.98124608 0.98637134
|
|
0.84985837 0.95394813 0.97616145 0.98296418]
|
|
|
|
mean value: 0.939954958092452
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.91428571 0.65306122 0.85294118 0.93939394 0.91428571
|
|
0.57575758 0.91176471 0.86111111 0.90909091]
|
|
|
|
mean value: 0.8445977785053416
|
|
|
|
key: train_jcc
|
|
value: [0.92307692 0.92532468 0.69339623 0.94352159 0.96357616 0.97315436
|
|
0.70469799 0.90909091 0.95364238 0.96644295]
|
|
|
|
mean value: 0.8955924173651768
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19408345 0.18791676 0.19393516 0.1892693 0.18968463 0.1914618
|
|
0.19001937 0.18625164 0.19071054 0.19217348]
|
|
|
|
mean value: 0.1905506134033203
|
|
|
|
key: score_time
|
|
value: [0.01648283 0.01540661 0.01589441 0.01590419 0.01571655 0.01537204
|
|
0.01550341 0.01552963 0.0156188 0.01553798]
|
|
|
|
mean value: 0.01569664478302002
|
|
|
|
key: test_mcc
|
|
value: [0.9701425 0.9701425 0.96969697 0.96969697 1. 0.96969697
|
|
1. 0.96966868 0.94017476 0.96966868]
|
|
|
|
mean value: 0.9728888029265946
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98484848 0.98484848 0.98461538 0.98461538 1. 0.98461538
|
|
1. 0.98461538 0.96923077 0.98461538]
|
|
|
|
mean value: 0.9862004662004662
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98507463 0.98507463 0.98461538 0.98461538 1. 0.98461538
|
|
1. 0.98507463 0.97058824 0.98507463]
|
|
|
|
mean value: 0.9864732896602958
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97058824 0.97058824 0.96969697 0.96969697 1. 0.96969697
|
|
1. 0.97058824 0.94285714 0.97058824]
|
|
|
|
mean value: 0.9734300993124523
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.98484848 0.98484848 0.98484848 1. 0.98484848
|
|
1. 0.984375 0.96875 0.984375 ]
|
|
|
|
mean value: 0.9861742424242425
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97058824 0.97058824 0.96969697 0.96969697 1. 0.96969697
|
|
1. 0.97058824 0.94285714 0.97058824]
|
|
|
|
mean value: 0.9734300993124523
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07466435 0.08926511 0.08730602 0.09664154 0.0819788 0.07781601
|
|
0.09088039 0.08559895 0.08477879 0.07983494]
|
|
|
|
mean value: 0.08487648963928222
|
|
|
|
key: score_time
|
|
value: [0.0218389 0.02159381 0.01945972 0.03571534 0.01717114 0.03562188
|
|
0.03001285 0.03977084 0.03206062 0.0384841 ]
|
|
|
|
mean value: 0.0291729211807251
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 1. 0.96969697 0.96969697 1. 0.96969697
|
|
1. 0.96966868 0.91144345 0.94017476]
|
|
|
|
mean value: 0.9639886393074825
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.99320865 0.9965986 1. 0.9965986
|
|
0.99659864 0.99320881 0.99659864 0.98983039]
|
|
|
|
mean value: 0.9962642330617676
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 1. 0.98461538 0.98461538 1. 0.98461538
|
|
1. 0.98461538 0.95384615 0.96923077]
|
|
|
|
mean value: 0.9816083916083916
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99659284 0.99829642 1. 0.99829642
|
|
0.99829642 0.99659284 0.99829642 0.99488927]
|
|
|
|
mean value: 0.9981260647359455
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 1. 0.98461538 0.98461538 1. 0.98461538
|
|
1. 0.98507463 0.95652174 0.97058824]
|
|
|
|
mean value: 0.9821254635733393
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99661017 0.99830221 1. 0.99830221
|
|
0.99829642 0.99659864 0.99829642 0.99490662]
|
|
|
|
mean value: 0.9981312689575405
|
|
|
|
key: test_precision
|
|
value: [0.94117647 1. 0.96969697 0.96969697 1. 0.96969697
|
|
1. 0.97058824 0.91666667 0.94285714]
|
|
|
|
mean value: 0.9680379424497072
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.99324324 0.99661017 1. 0.99661017
|
|
0.99659864 0.99322034 0.99659864 0.98986486]
|
|
|
|
mean value: 0.9962746064985775
|
|
|
|
key: test_recall
|
|
value: [0.96969697 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
0.996969696969697
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 1. 0.98484848 0.98484848 1. 0.98484848
|
|
1. 0.984375 0.953125 0.96875 ]
|
|
|
|
mean value: 0.9815340909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99658703 0.99829352 1. 0.99829352
|
|
0.99829932 0.99659864 0.99829932 0.99489796]
|
|
|
|
mean value: 0.9981269299528686
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 1. 0.96969697 0.96969697 1. 0.96969697
|
|
1. 0.97058824 0.91666667 0.94285714]
|
|
|
|
mean value: 0.9653488668194551
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.99324324 0.99661017 1. 0.99661017
|
|
0.99659864 0.99322034 0.99659864 0.98986486]
|
|
|
|
mean value: 0.9962746064985775
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.26663494 0.18799305 0.22161603 0.20603728 0.25028539 0.19107294
|
|
0.25472808 0.27957559 0.28712082 0.25773311]
|
|
|
|
mean value: 0.2402797222137451
|
|
|
|
key: score_time
|
|
value: [0.01788664 0.03220916 0.01656365 0.02937508 0.02889848 0.01650047
|
|
0.02786303 0.02866268 0.03101921 0.02904153]
|
|
|
|
mean value: 0.025801992416381835
|
|
|
|
key: test_mcc
|
|
value: [0.88531564 0.78086881 0.85663571 0.96969697 0.84995597 0.85663571
|
|
0.91144345 0.88340557 0.77695466 0.88340557]
|
|
|
|
mean value: 0.8654318051752643
|
|
|
|
key: train_mcc
|
|
value: [0.9830783 0.98981298 0.98646265 0.98983004 0.98983004 0.99320865
|
|
0.98983039 0.98646327 0.98310733 0.98646327]
|
|
|
|
mean value: 0.9878086916994873
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 0.87878788 0.92307692 0.98461538 0.92307692 0.92307692
|
|
0.95384615 0.93846154 0.87692308 0.93846154]
|
|
|
|
mean value: 0.927972027972028
|
|
|
|
key: train_accuracy
|
|
value: [0.99146758 0.99488055 0.99318569 0.99488927 0.99488927 0.99659284
|
|
0.99488927 0.99318569 0.99148211 0.99318569]
|
|
|
|
mean value: 0.9938647952509143
|
|
|
|
key: test_fscore
|
|
value: [0.94285714 0.89189189 0.92753623 0.98461538 0.92537313 0.92753623
|
|
0.95652174 0.94285714 0.89189189 0.94285714]
|
|
|
|
mean value: 0.9333937934197506
|
|
|
|
key: train_fscore
|
|
value: [0.99153976 0.99490662 0.99324324 0.99492386 0.99492386 0.99661017
|
|
0.99490662 0.99322034 0.99153976 0.99322034]
|
|
|
|
mean value: 0.9939034575448025
|
|
|
|
key: test_precision
|
|
value: [0.89189189 0.80487805 0.86486486 0.96969697 0.88571429 0.86486486
|
|
0.91666667 0.89189189 0.80487805 0.89189189]
|
|
|
|
mean value: 0.8787239425044303
|
|
|
|
key: train_precision
|
|
value: [0.98322148 0.98986486 0.98657718 0.98989899 0.98989899 0.99324324
|
|
0.98986486 0.98653199 0.98322148 0.98653199]
|
|
|
|
mean value: 0.9878855060063114
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.96875 1. 1. 1. 1.
|
|
1. ]
|
|
|
|
mean value: 0.996875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93939394 0.87878788 0.92424242 0.98484848 0.92376894 0.92424242
|
|
0.953125 0.9375 0.875 0.9375 ]
|
|
|
|
mean value: 0.9278409090909091
|
|
|
|
key: train_roc_auc
|
|
value: [0.99146758 0.99488055 0.99317406 0.99488055 0.99488055 0.99658703
|
|
0.99489796 0.99319728 0.9914966 0.99319728]
|
|
|
|
mean value: 0.9938659422813494
|
|
|
|
key: test_jcc
|
|
value: [0.89189189 0.80487805 0.86486486 0.96969697 0.86111111 0.86486486
|
|
0.91666667 0.89189189 0.80487805 0.89189189]
|
|
|
|
mean value: 0.8762636250441129
|
|
|
|
key: train_jcc
|
|
value: [0.98322148 0.98986486 0.98657718 0.98989899 0.98989899 0.99324324
|
|
0.98986486 0.98653199 0.98322148 0.98653199]
|
|
|
|
mean value: 0.9878855060063114
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.76841474 0.74795699 0.74724698 0.76810884 0.7429924 0.75336194
|
|
0.76922297 0.75542974 0.76049519 0.74811101]
|
|
|
|
mean value: 0.7561340808868409
|
|
|
|
key: score_time
|
|
value: [0.00952291 0.00938296 0.00974011 0.00956559 0.0093658 0.00958729
|
|
0.01015449 0.00941133 0.00982475 0.00931835]
|
|
|
|
mean value: 0.009587359428405762
|
|
|
|
key: test_mcc
|
|
value: [0.94112395 0.9701425 0.96969697 0.96969697 1. 0.96969697
|
|
0.96966868 0.96966868 0.91144345 0.94017476]
|
|
|
|
mean value: 0.9611312929494409
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.98484848 0.98461538 0.98461538 1. 0.98461538
|
|
0.98461538 0.98461538 0.95384615 0.96923077]
|
|
|
|
mean value: 0.9800699300699302
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97058824 0.98507463 0.98461538 0.98461538 1. 0.98461538
|
|
0.98507463 0.98507463 0.95652174 0.97058824]
|
|
|
|
mean value: 0.9806768244161839
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.97058824 0.96969697 0.96969697 1. 0.96969697
|
|
0.97058824 0.97058824 0.91666667 0.94285714]
|
|
|
|
mean value: 0.9623236567354214
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96969697 0.98484848 0.98484848 0.98484848 1. 0.98484848
|
|
0.984375 0.984375 0.953125 0.96875 ]
|
|
|
|
mean value: 0.9799715909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94285714 0.97058824 0.96969697 0.96969697 1. 0.96969697
|
|
0.97058824 0.97058824 0.91666667 0.94285714]
|
|
|
|
mean value: 0.9623236567354214
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0343082 0.03298187 0.03278327 0.03844833 0.03307199 0.03226948
|
|
0.03353143 0.03394914 0.03774524 0.04404974]
|
|
|
|
mean value: 0.035313868522644044
|
|
|
|
key: score_time
|
|
value: [0.01221323 0.01314235 0.01495123 0.01498508 0.01509428 0.0152576
|
|
0.03477526 0.01318979 0.01854706 0.02858615]
|
|
|
|
mean value: 0.01807420253753662
|
|
|
|
key: test_mcc
|
|
value: [0.9701425 0.90950859 1. 1. 0.90814394 0.96969697
|
|
0.96966868 0.96966868 1. 1. ]
|
|
|
|
mean value: 0.9696829367214851
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98484848 0.95454545 1. 1. 0.95384615 0.98461538
|
|
0.98461538 0.98461538 1. 1. ]
|
|
|
|
mean value: 0.9847086247086247
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98461538 0.95384615 1. 1. 0.95384615 0.98461538
|
|
0.98507463 0.98507463 1. 1. ]
|
|
|
|
mean value: 0.9847072330654421
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96875 1. 1. 0.93939394 0.96969697
|
|
0.97058824 0.97058824 1. 1. ]
|
|
|
|
mean value: 0.9819017379679145
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.93939394 1. 1. 0.96875 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9877840909090909
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98484848 0.95454545 1. 1. 0.95407197 0.98484848
|
|
0.984375 0.984375 1. 1. ]
|
|
|
|
mean value: 0.9847064393939394
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96969697 0.91176471 1. 1. 0.91176471 0.96969697
|
|
0.97058824 0.97058824 1. 1. ]
|
|
|
|
mean value: 0.9704099821746881
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02863717 0.04037857 0.04070926 0.03674984 0.04031229 0.04035211
|
|
0.0404079 0.04057741 0.04059529 0.04037046]
|
|
|
|
mean value: 0.03890902996063232
|
|
|
|
key: score_time
|
|
value: [0.01908898 0.01911449 0.0154767 0.01687717 0.0188179 0.01907849
|
|
0.01894832 0.02247143 0.01914191 0.01913929]
|
|
|
|
mean value: 0.01881546974182129
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.87878788 0.88382395 0.93844697 0.84995597 0.94028478
|
|
0.96969697 0.96966868 0.84644588 0.90814394]
|
|
|
|
mean value: 0.9094763613775467
|
|
|
|
key: train_mcc
|
|
value: [0.94881099 0.9488552 0.94208333 0.94212641 0.94889774 0.93869211
|
|
0.94550795 0.942084 0.95575756 0.95584685]
|
|
|
|
mean value: 0.9468662121748642
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.93939394 0.93846154 0.96923077 0.92307692 0.96923077
|
|
0.98461538 0.98461538 0.92307692 0.95384615]
|
|
|
|
mean value: 0.954009324009324
|
|
|
|
key: train_accuracy
|
|
value: [0.97440273 0.97440273 0.97103918 0.97103918 0.97444634 0.9693356
|
|
0.97274276 0.97103918 0.97785349 0.97785349]
|
|
|
|
mean value: 0.9734154694140972
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.93939394 0.94117647 0.96875 0.92537313 0.96969697
|
|
0.98461538 0.98507463 0.92537313 0.95384615]
|
|
|
|
mean value: 0.9548523694260086
|
|
|
|
key: train_fscore
|
|
value: [0.97444634 0.97453311 0.97113752 0.97123519 0.97453311 0.96949153
|
|
0.97278912 0.97103918 0.97792869 0.97800338]
|
|
|
|
mean value: 0.9735137167185135
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.93939394 0.88888889 0.96875 0.88571429 0.94117647
|
|
1. 0.97058824 0.91176471 0.96875 ]
|
|
|
|
mean value: 0.9416202996350055
|
|
|
|
key: train_precision
|
|
value: [0.97278912 0.96959459 0.96949153 0.96632997 0.97288136 0.96621622
|
|
0.96949153 0.96938776 0.97297297 0.96979866]
|
|
|
|
mean value: 0.9698953685359831
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.93939394 1. 0.96875 0.96875 1.
|
|
0.96969697 1. 0.93939394 0.93939394]
|
|
|
|
mean value: 0.9695075757575757
|
|
|
|
key: train_recall/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:188: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_sl.py:191: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
value: [0.97610922 0.97952218 0.97278912 0.97619048 0.97619048 0.97278912
|
|
0.97610922 0.97269625 0.98293515 0.98634812]
|
|
|
|
mean value: 0.977167932019224
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.93939394 0.93939394 0.96922348 0.92376894 0.96969697
|
|
0.98484848 0.984375 0.92282197 0.95407197]
|
|
|
|
mean value: 0.9542140151515152
|
|
|
|
key: train_roc_auc
|
|
value: [0.97440273 0.97440273 0.9710362 0.97103039 0.97444336 0.96932971
|
|
0.97274849 0.971042 0.97786213 0.97786794]
|
|
|
|
mean value: 0.9734165679923846
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.88571429 0.88888889 0.93939394 0.86111111 0.94117647
|
|
0.96969697 0.97058824 0.86111111 0.91176471]
|
|
|
|
mean value: 0.9143731431966726
|
|
|
|
key: train_jcc
|
|
value: [0.95016611 0.95033113 0.94389439 0.94407895 0.95033113 0.94078947
|
|
0.94701987 0.94370861 0.95681063 0.95695364]
|
|
|
|
mean value: 0.9484083925538549
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.28756905 0.30973029 0.30490494 0.31140661 0.35169911 0.40486741
|
|
0.30568242 0.30639338 0.30918193 0.30819392]
|
|
|
|
mean value: 0.31996290683746337
|
|
|
|
key: score_time
|
|
value: [0.01921129 0.01902771 0.01945329 0.01908779 0.02094746 0.01899981
|
|
0.01919055 0.01918483 0.01925635 0.01911736]
|
|
|
|
mean value: 0.019347643852233885
|
|
|
|
key: test_mcc
|
|
value: [0.90950859 0.87878788 0.88382395 0.93844697 0.84995597 0.94028478
|
|
0.96969697 0.94017476 0.84644588 0.90814394]
|
|
|
|
mean value: 0.9065269687521299
|
|
|
|
key: train_mcc
|
|
value: [0.94881099 0.9488552 0.94208333 0.94212641 0.94889774 0.93869211
|
|
0.94550795 0.95232236 0.95575756 0.95584685]
|
|
|
|
mean value: 0.9478900476185305
|
|
|
|
key: test_accuracy
|
|
value: [0.95454545 0.93939394 0.93846154 0.96923077 0.92307692 0.96923077
|
|
0.98461538 0.96923077 0.92307692 0.95384615]
|
|
|
|
mean value: 0.9524708624708625
|
|
|
|
key: train_accuracy
|
|
value: [0.97440273 0.97440273 0.97103918 0.97103918 0.97444634 0.9693356
|
|
0.97274276 0.97614991 0.97785349 0.97785349]
|
|
|
|
mean value: 0.9739265426679302
|
|
|
|
key: test_fscore
|
|
value: [0.95522388 0.93939394 0.94117647 0.96875 0.92537313 0.96969697
|
|
0.98461538 0.97058824 0.92537313 0.95384615]
|
|
|
|
mean value: 0.9534037302688532
|
|
|
|
key: train_fscore
|
|
value: [0.97444634 0.97453311 0.97113752 0.97123519 0.97453311 0.96949153
|
|
0.97278912 0.97619048 0.97792869 0.97800338]
|
|
|
|
mean value: 0.9740288461092816
|
|
|
|
key: test_precision
|
|
value: [0.94117647 0.93939394 0.88888889 0.96875 0.88571429 0.94117647
|
|
1. 0.94285714 0.91176471 0.96875 ]
|
|
|
|
mean value: 0.938847190391308
|
|
|
|
key: train_precision
|
|
value: [0.97278912 0.96959459 0.96949153 0.96632997 0.97288136 0.96621622
|
|
0.96949153 0.97288136 0.97297297 0.96979866]
|
|
|
|
mean value: 0.9702447286189994
|
|
|
|
key: test_recall
|
|
value: [0.96969697 0.93939394 1. 0.96875 0.96875 1.
|
|
0.96969697 1. 0.93939394 0.93939394]
|
|
|
|
mean value: 0.9695075757575757
|
|
|
|
key: train_recall
|
|
value: [0.97610922 0.97952218 0.97278912 0.97619048 0.97619048 0.97278912
|
|
0.97610922 0.97952218 0.98293515 0.98634812]
|
|
|
|
mean value: 0.9778505258758794
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.93939394 0.93939394 0.96922348 0.92376894 0.96969697
|
|
0.98484848 0.96875 0.92282197 0.95407197]
|
|
|
|
mean value: 0.9526515151515151
|
|
|
|
key: train_roc_auc
|
|
value: [0.97440273 0.97440273 0.9710362 0.97103039 0.97444336 0.96932971
|
|
0.97274849 0.97615565 0.97786213 0.97786794]
|
|
|
|
mean value: 0.9739279329479232
|
|
|
|
key: test_jcc
|
|
value: [0.91428571 0.88571429 0.88888889 0.93939394 0.86111111 0.94117647
|
|
0.96969697 0.94285714 0.86111111 0.91176471]
|
|
|
|
mean value: 0.9116000339529752
|
|
|
|
key: train_jcc
|
|
value: [0.95016611 0.95033113 0.94389439 0.94407895 0.95033113 0.94078947
|
|
0.94701987 0.95348837 0.95681063 0.95695364]
|
|
|
|
mean value: 0.9493863688360049
|
|
|
|
MCC on Blind test: 0.92
|
|
|
|
Accuracy on Blind test: 0.97
|