19521 lines
952 KiB
Text
19521 lines
952 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_8020.py:549: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 817
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 817
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 175
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification: 80/20
|
|
Train data size: (373, 175)
|
|
Test data size: (94, 175)
|
|
y_train numbers: Counter({1: 247, 0: 126})
|
|
y_train ratio: 0.5101214574898786
|
|
|
|
y_test_numbers: Counter({1: 62, 0: 32})
|
|
y_test ratio: 0.5161290322580645
|
|
-------------------------------------------------------------
|
|
|
|
Simple Random OverSampling
|
|
Counter({1: 247, 0: 247})
|
|
(494, 175)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 126, 1: 126})
|
|
(252, 175)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 247, 1: 247})
|
|
(494, 175)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({1: 247, 0: 247})
|
|
(494, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: 80/20 split
|
|
Gene name: katG
|
|
Drug name: isoniazid
|
|
|
|
Output directory: /home/tanu/git/Data/isoniazid/output/ml/tts_8020/
|
|
Sanity checks:
|
|
ML source data size: (467, 175)
|
|
Total input features: (373, 175)
|
|
Target feature numbers: Counter({1: 247, 0: 126})
|
|
Target features ratio: 0.5101214574898786
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0326004 0.0351243 0.03430986 0.0350132 0.03494811 0.03449178
|
|
0.03495932 0.03537345 0.03605509 0.03457975]
|
|
|
|
mean value: 0.034745526313781736
|
|
|
|
key: score_time
|
|
value: [0.0122869 0.01178741 0.01324797 0.01338649 0.0133276 0.01330996
|
|
0.01321411 0.01316428 0.01336861 0.01324463]
|
|
|
|
mean value: 0.013033795356750488
|
|
|
|
key: test_mcc
|
|
value: [0.89293523 0.7725393 0.70344321 0.50666667 0.93883452 0.75809804
|
|
0.87841046 0.50641992 0.82689823 0.77032889]
|
|
|
|
mean value: 0.7554574471705212
|
|
|
|
key: train_mcc
|
|
value: [0.81851165 0.8453441 0.84580011 0.82631713 0.79865798 0.83297615
|
|
0.83297615 0.83884723 0.83157753 0.85324818]
|
|
|
|
mean value: 0.8324256204932131
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 0.89473684 0.86842105 0.78378378 0.97297297 0.89189189
|
|
0.94594595 0.78378378 0.91891892 0.89189189]
|
|
|
|
mean value: 0.8899715504978662
|
|
|
|
key: train_accuracy
|
|
value: [0.91940299 0.93134328 0.93134328 0.92261905 0.91071429 0.92559524
|
|
0.92559524 0.92857143 0.92559524 0.93452381]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
0.9255303837953092
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.92592593 0.90196078 0.84 0.98039216 0.92592593
|
|
0.96153846 0.84615385 0.94117647 0.92307692]
|
|
|
|
mean value: 0.9204483827719122
|
|
|
|
key: train_fscore
|
|
value: [0.94143167 0.94967177 0.94989107 0.94347826 0.93478261 0.94553377
|
|
0.94553377 0.94782609 0.9452954 0.95238095]
|
|
|
|
mean value: 0.9455825362092063
|
|
|
|
key: test_precision
|
|
value: [1. 0.86206897 0.88461538 0.84 0.96153846 0.86206897
|
|
0.92592593 0.78571429 0.88888889 0.85714286]
|
|
|
|
mean value: 0.8867963734860287
|
|
|
|
key: train_precision
|
|
value: [0.90794979 0.92340426 0.91983122 0.91176471 0.90336134 0.91561181
|
|
0.91561181 0.91983122 0.92307692 0.92050209]
|
|
|
|
mean value: 0.9160945187610795
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.92 0.84 1. 1.
|
|
1. 0.91666667 1. 1. ]
|
|
|
|
mean value: 0.9596666666666667
|
|
|
|
key: train_recall
|
|
value: [0.97747748 0.97747748 0.98198198 0.97747748 0.96846847 0.97747748
|
|
0.97747748 0.97757848 0.96860987 0.98654709]
|
|
|
|
mean value: 0.9770573263846807
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.84615385 0.84461538 0.75333333 0.95833333 0.83333333
|
|
0.91666667 0.7275641 0.88461538 0.84615385]
|
|
|
|
mean value: 0.8570769230769231
|
|
|
|
key: train_roc_auc
|
|
value: [0.89139361 0.90909272 0.90692019 0.89663348 0.88335704 0.90101944
|
|
0.90101944 0.90471844 0.90465892 0.90920275]
|
|
|
|
mean value: 0.90080160217697
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.86206897 0.82142857 0.72413793 0.96153846 0.86206897
|
|
0.92592593 0.73333333 0.88888889 0.85714286]
|
|
|
|
mean value: 0.8556533900327004
|
|
|
|
key: train_jcc
|
|
value: [0.88934426 0.90416667 0.90456432 0.89300412 0.87755102 0.89669421
|
|
0.89669421 0.90082645 0.89626556 0.90909091]
|
|
|
|
mean value: 0.8968201725238889
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.89936996 0.76085305 0.77836537 0.94507694 1.11818767 0.92478609
|
|
0.75766754 0.80714226 0.88604689 0.76442361]
|
|
|
|
mean value: 0.8641919374465943
|
|
|
|
key: score_time
|
|
value: [0.01350546 0.01369262 0.01547456 0.01348424 0.01345706 0.01348734
|
|
0.01350856 0.01594925 0.01427746 0.01342702]
|
|
|
|
mean value: 0.014026355743408204
|
|
|
|
key: test_mcc
|
|
value: [0.89293523 0.82874193 0.88307692 0.7689699 0.87666667 0.87841046
|
|
0.93883452 0.70986749 0.88141026 0.88377887]
|
|
|
|
mean value: 0.8542692243292472
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99333067 0.99333067 1. 0.99336957 0.98675438
|
|
0.98675438 1. 1. 0.99334066]
|
|
|
|
mean value: 0.9946880333603559
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 0.92105263 0.94736842 0.89189189 0.94594595 0.94594595
|
|
0.97297297 0.86486486 0.94594595 0.94594595]
|
|
|
|
mean value: 0.9329302987197724
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99701493 0.99701493 1. 0.99702381 0.99404762
|
|
0.99404762 1. 1. 0.99702381]
|
|
|
|
mean value: 0.9976172707889126
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.94339623 0.96 0.91666667 0.96 0.96153846
|
|
0.98039216 0.89361702 0.95833333 0.96 ]
|
|
|
|
mean value: 0.949227719942623
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99775281 0.99775281 1. 0.99775281 0.9955157
|
|
0.9955157 1. 1. 0.99776286]
|
|
|
|
mean value: 0.9982052680635497
|
|
|
|
key: test_precision
|
|
value: [1. 0.89285714 0.96 0.95652174 0.96 0.92592593
|
|
0.96153846 0.91304348 0.95833333 0.92307692]
|
|
|
|
mean value: 0.9451297004123091
|
|
|
|
key: train_precision
|
|
value: [1. 0.9955157 0.9955157 1. 0.9955157 0.99107143
|
|
0.99107143 1. 1. 0.99553571]
|
|
|
|
mean value: 0.9964225656630366
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.96 0.88 0.96 1.
|
|
1. 0.875 0.95833333 1. ]
|
|
|
|
mean value: 0.9553333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.88461538 0.94153846 0.89833333 0.93833333 0.91666667
|
|
0.95833333 0.86057692 0.94070513 0.92307692]
|
|
|
|
mean value: 0.9222179487179487
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99557522 0.99557522 1. 0.99561404 0.99122807
|
|
0.99122807 1. 1. 0.99557522]
|
|
|
|
mean value: 0.996479583915541
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.89285714 0.92307692 0.84615385 0.92307692 0.92592593
|
|
0.96153846 0.80769231 0.92 0.92307692]
|
|
|
|
mean value: 0.9043398453398453
|
|
|
|
key: train_jcc
|
|
value: [1. 0.9955157 0.9955157 1. 0.9955157 0.99107143
|
|
0.99107143 1. 1. 0.99553571]
|
|
|
|
mean value: 0.9964225656630366
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01377988 0.01234794 0.00960684 0.00934958 0.0093317 0.00938678
|
|
0.00970578 0.00953078 0.00943804 0.00936127]
|
|
|
|
mean value: 0.01018385887145996
|
|
|
|
key: score_time
|
|
value: [0.01207232 0.0093112 0.00888681 0.0087111 0.00873423 0.00869703
|
|
0.00866437 0.00867462 0.00866818 0.00864863]
|
|
|
|
mean value: 0.009106850624084473
|
|
|
|
key: test_mcc
|
|
value: [0.55248501 0.53230769 0.32486684 0.48626592 0.8125186 0.54918169
|
|
0.68620269 0.6383744 0.47639996 0.45760432]
|
|
|
|
mean value: 0.5516207114542601
|
|
|
|
key: train_mcc
|
|
value: [0.60955858 0.57267002 0.67693867 0.68006045 0.56385632 0.66672593
|
|
0.6030781 0.71157668 0.63846453 0.61505282]
|
|
|
|
mean value: 0.6337982098825211
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.78947368 0.71052632 0.75675676 0.91891892 0.81081081
|
|
0.86486486 0.83783784 0.75675676 0.75675676]
|
|
|
|
mean value: 0.7992176386913229
|
|
|
|
key: train_accuracy
|
|
value: [0.82686567 0.80895522 0.85671642 0.85714286 0.78571429 0.85119048
|
|
0.82440476 0.87202381 0.83928571 0.82738095]
|
|
|
|
mean value: 0.8349680170575693
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.84 0.79245283 0.80851064 0.94117647 0.86792453
|
|
0.90196078 0.88461538 0.80851064 0.81632653]
|
|
|
|
mean value: 0.8494811138549234
|
|
|
|
key: train_fscore
|
|
value: [0.87053571 0.85585586 0.89285714 0.89237668 0.82524272 0.88789238
|
|
0.8691796 0.90423163 0.87946429 0.86936937]
|
|
|
|
mean value: 0.8747005371547041
|
|
|
|
key: test_precision
|
|
value: [0.86956522 0.84 0.75 0.86363636 0.92307692 0.82142857
|
|
0.88461538 0.82142857 0.82608696 0.8 ]
|
|
|
|
mean value: 0.8399837988098857
|
|
|
|
key: train_precision
|
|
value: [0.86283186 0.85585586 0.88495575 0.88839286 0.89473684 0.88392857
|
|
0.8558952 0.89823009 0.87555556 0.87330317]
|
|
|
|
mean value: 0.8773685745130512
|
|
|
|
key: test_recall
|
|
value: [0.8 0.84 0.84 0.76 0.96 0.92
|
|
0.92 0.95833333 0.79166667 0.83333333]
|
|
|
|
mean value: 0.8623333333333334
|
|
|
|
key: train_recall
|
|
value: [0.87837838 0.85585586 0.9009009 0.8963964 0.76576577 0.89189189
|
|
0.88288288 0.9103139 0.88340807 0.86547085]
|
|
|
|
mean value: 0.873126489718418
|
|
|
|
key: test_roc_auc
|
|
value: [0.78461538 0.76615385 0.65076923 0.755 0.89666667 0.75166667
|
|
0.835 0.78685897 0.74198718 0.72435897]
|
|
|
|
mean value: 0.7693076923076922
|
|
|
|
key: train_roc_auc
|
|
value: [0.80202105 0.78633501 0.8354062 0.83854908 0.79516358 0.83191086
|
|
0.7967046 0.85338704 0.81781023 0.80884162]
|
|
|
|
mean value: 0.816612926585649
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.72413793 0.65625 0.67857143 0.88888889 0.76666667
|
|
0.82142857 0.79310345 0.67857143 0.68965517]
|
|
|
|
mean value: 0.7411559250136837
|
|
|
|
key: train_jcc
|
|
value: [0.77075099 0.7480315 0.80645161 0.80566802 0.70247934 0.7983871
|
|
0.76862745 0.82520325 0.78486056 0.7689243 ]
|
|
|
|
mean value: 0.7779384112490693
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01094198 0.01007462 0.00989532 0.00960779 0.0097847 0.00982642
|
|
0.00966573 0.00970364 0.00974774 0.0096848 ]
|
|
|
|
mean value: 0.009893274307250977
|
|
|
|
key: score_time
|
|
value: [0.00967121 0.00875974 0.00879407 0.0088563 0.0089066 0.00944901
|
|
0.00882554 0.00880575 0.00881219 0.00875711]
|
|
|
|
mean value: 0.008963751792907714
|
|
|
|
key: test_mcc
|
|
value: [0.82916172 0.48418203 0.57680438 0.55988677 0.61840096 0.61840096
|
|
0.55988677 0.16354278 0.51219003 0.69941809]
|
|
|
|
mean value: 0.5621874492561967
|
|
|
|
key: train_mcc
|
|
value: [0.63762292 0.64371935 0.66852645 0.65437497 0.64566458 0.66878177
|
|
0.66707378 0.67529593 0.63811234 0.66643709]
|
|
|
|
mean value: 0.6565609170305504
|
|
|
|
key: test_accuracy
|
|
value: [0.92105263 0.76315789 0.81578947 0.81081081 0.83783784 0.83783784
|
|
0.81081081 0.64864865 0.78378378 0.86486486]
|
|
|
|
mean value: 0.8094594594594595
|
|
|
|
key: train_accuracy
|
|
value: [0.84179104 0.84477612 0.85373134 0.84821429 0.8452381 0.85416667
|
|
0.85416667 0.85714286 0.8422619 0.85416667]
|
|
|
|
mean value: 0.8495655650319829
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.81632653 0.86792453 0.8627451 0.88461538 0.88461538
|
|
0.8627451 0.75471698 0.84 0.90196078]
|
|
|
|
mean value: 0.8614425299873215
|
|
|
|
key: train_fscore
|
|
value: [0.88453159 0.88744589 0.89135255 0.88840263 0.88793103 0.89230769
|
|
0.89370933 0.89427313 0.88503254 0.89324619]
|
|
|
|
mean value: 0.8898232560986886
|
|
|
|
key: test_precision
|
|
value: [0.95833333 0.83333333 0.82142857 0.84615385 0.85185185 0.85185185
|
|
0.84615385 0.68965517 0.80769231 0.85185185]
|
|
|
|
mean value: 0.8358305966064586
|
|
|
|
key: train_precision
|
|
value: [0.85654008 0.85416667 0.87772926 0.86382979 0.85123967 0.87124464
|
|
0.86192469 0.87878788 0.85714286 0.86864407]
|
|
|
|
mean value: 0.8641249590465252
|
|
|
|
key: test_recall
|
|
value: [0.92 0.8 0.92 0.88 0.92 0.92
|
|
0.88 0.83333333 0.875 0.95833333]
|
|
|
|
mean value: 0.8906666666666667
|
|
|
|
key: train_recall
|
|
value: [0.91441441 0.92342342 0.90540541 0.91441441 0.92792793 0.91441441
|
|
0.92792793 0.9103139 0.91479821 0.91928251]
|
|
|
|
mean value: 0.9172322546762008
|
|
|
|
key: test_roc_auc
|
|
value: [0.92153846 0.74615385 0.76769231 0.77333333 0.79333333 0.79333333
|
|
0.77333333 0.57051282 0.74519231 0.82532051]
|
|
|
|
mean value: 0.7709743589743591
|
|
|
|
key: train_roc_auc
|
|
value: [0.80676473 0.80684446 0.8288089 0.81685633 0.80606923 0.82562826
|
|
0.81922712 0.83126315 0.80695663 0.82247311]
|
|
|
|
mean value: 0.8170891905270462
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.68965517 0.76666667 0.75862069 0.79310345 0.79310345
|
|
0.75862069 0.60606061 0.72413793 0.82142857]
|
|
|
|
mean value: 0.7596012608081574
|
|
|
|
key: train_jcc
|
|
value: [0.79296875 0.79766537 0.804 0.7992126 0.79844961 0.80555556
|
|
0.80784314 0.80876494 0.79377432 0.80708661]
|
|
|
|
mean value: 0.8015320896766981
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00925493 0.01092601 0.01035976 0.01055908 0.01019859 0.01036501
|
|
0.01051664 0.0091784 0.00894523 0.01044321]
|
|
|
|
mean value: 0.010074687004089356
|
|
|
|
key: score_time
|
|
value: [0.06930518 0.0128603 0.01213789 0.01573467 0.01184487 0.0118773
|
|
0.01323462 0.01132178 0.01328206 0.01180363]
|
|
|
|
mean value: 0.018340229988098145
|
|
|
|
key: test_mcc
|
|
value: [0.51821022 0.39589973 0.44835883 0.41461399 0.33735363 0.61784991
|
|
0.50666667 0.37148459 0.38832708 0.51219003]
|
|
|
|
mean value: 0.45109546666491884
|
|
|
|
key: train_mcc
|
|
value: [0.60048177 0.62148576 0.66474254 0.67796202 0.61681097 0.61696337
|
|
0.67581383 0.62911205 0.63622392 0.60888417]
|
|
|
|
mean value: 0.6348480401883072
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.73684211 0.76315789 0.75675676 0.72972973 0.83783784
|
|
0.78378378 0.72972973 0.72972973 0.78378378]
|
|
|
|
mean value: 0.7640825035561878
|
|
|
|
key: train_accuracy
|
|
value: [0.82686567 0.8358209 0.85373134 0.85714286 0.83333333 0.83333333
|
|
0.85714286 0.83928571 0.8422619 0.83035714]
|
|
|
|
mean value: 0.8409275053304904
|
|
|
|
key: test_fscore
|
|
value: [0.84615385 0.80769231 0.84210526 0.83018868 0.81481481 0.88888889
|
|
0.84 0.82142857 0.8 0.84 ]
|
|
|
|
mean value: 0.8331272371381606
|
|
|
|
key: train_fscore
|
|
value: [0.87553648 0.88372093 0.89596603 0.9 0.88085106 0.88185654
|
|
0.89915966 0.88607595 0.88747346 0.87741935]
|
|
|
|
mean value: 0.8868059473350633
|
|
|
|
key: test_precision
|
|
value: [0.81481481 0.77777778 0.75 0.78571429 0.75862069 0.82758621
|
|
0.84 0.71875 0.76923077 0.80769231]
|
|
|
|
mean value: 0.785018685178168
|
|
|
|
key: train_precision
|
|
value: [0.83606557 0.83266932 0.84738956 0.8372093 0.83467742 0.82936508
|
|
0.84251969 0.83665339 0.84274194 0.84297521]
|
|
|
|
mean value: 0.8382266469347082
|
|
|
|
key: test_recall
|
|
value: [0.88 0.84 0.96 0.88 0.88 0.96
|
|
0.84 0.95833333 0.83333333 0.875 ]
|
|
|
|
mean value: 0.8906666666666667
|
|
|
|
key: train_recall
|
|
value: [0.91891892 0.94144144 0.95045045 0.97297297 0.93243243 0.94144144
|
|
0.96396396 0.94170404 0.93721973 0.91479821]
|
|
|
|
mean value: 0.9415343594715793
|
|
|
|
key: test_roc_auc
|
|
value: [0.74769231 0.68923077 0.67230769 0.69 0.64833333 0.77166667
|
|
0.75333333 0.63301282 0.68589744 0.74519231]
|
|
|
|
mean value: 0.7036666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.78246831 0.78488001 0.80708363 0.80227596 0.78639165 0.78212423
|
|
0.80654339 0.78943609 0.79604349 0.78925751]
|
|
|
|
mean value: 0.7926504276786556
|
|
|
|
key: test_jcc
|
|
value: [0.73333333 0.67741935 0.72727273 0.70967742 0.6875 0.8
|
|
0.72413793 0.6969697 0.66666667 0.72413793]
|
|
|
|
mean value: 0.7147115060504938
|
|
|
|
key: train_jcc
|
|
value: [0.77862595 0.79166667 0.81153846 0.81818182 0.78707224 0.78867925
|
|
0.81679389 0.79545455 0.79770992 0.7816092 ]
|
|
|
|
mean value: 0.7967331946865184
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01879001 0.01627064 0.01751494 0.01576185 0.01556849 0.01573348
|
|
0.01594043 0.01525545 0.01536846 0.01547551]
|
|
|
|
mean value: 0.016167926788330077
|
|
|
|
key: score_time
|
|
value: [0.01143479 0.01122189 0.01125312 0.01110315 0.01058292 0.01032877
|
|
0.01039791 0.01021576 0.01015782 0.01008463]
|
|
|
|
mean value: 0.01067807674407959
|
|
|
|
key: test_mcc
|
|
value: [0.70344321 0.7725393 0.70277316 0.40239363 0.54980516 0.69721669
|
|
0.69721669 0.20588303 0.57193794 0.77032889]
|
|
|
|
mean value: 0.6073537694194354
|
|
|
|
key: train_mcc
|
|
value: [0.71800383 0.65964628 0.73571229 0.70508341 0.6928506 0.69726159
|
|
0.71183668 0.71018601 0.69781121 0.69656179]
|
|
|
|
mean value: 0.7024953685973351
|
|
|
|
key: test_accuracy
|
|
value: [0.86842105 0.89473684 0.86842105 0.75675676 0.81081081 0.86486486
|
|
0.86486486 0.67567568 0.81081081 0.89189189]
|
|
|
|
mean value: 0.8307254623044097
|
|
|
|
key: train_accuracy
|
|
value: [0.87164179 0.85074627 0.88059701 0.86904762 0.86309524 0.86607143
|
|
0.87202381 0.87202381 0.86607143 0.86607143]
|
|
|
|
mean value: 0.8677389836531628
|
|
|
|
key: test_fscore
|
|
value: [0.90196078 0.92592593 0.90566038 0.83636364 0.87272727 0.90909091
|
|
0.90909091 0.78571429 0.8627451 0.92307692]
|
|
|
|
mean value: 0.8832356121701294
|
|
|
|
key: train_fscore
|
|
value: [0.91134021 0.89539749 0.91666667 0.90756303 0.90416667 0.90526316
|
|
0.90947368 0.90985325 0.90644491 0.90605428]
|
|
|
|
mean value: 0.9072223332044272
|
|
|
|
key: test_precision
|
|
value: [0.88461538 0.86206897 0.85714286 0.76666667 0.8 0.83333333
|
|
0.83333333 0.6875 0.81481481 0.85714286]
|
|
|
|
mean value: 0.8196618212566489
|
|
|
|
key: train_precision
|
|
value: [0.84030418 0.8359375 0.85271318 0.8503937 0.84108527 0.84980237
|
|
0.85375494 0.85433071 0.84496124 0.84765625]
|
|
|
|
mean value: 0.847093934413377
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.96 0.92 0.96 1.
|
|
1. 0.91666667 0.91666667 1. ]
|
|
|
|
mean value: 0.9593333333333334
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.96396396 0.99099099 0.97297297 0.97747748 0.96846847
|
|
0.97297297 0.97309417 0.97757848 0.97309417]
|
|
|
|
mean value: 0.9766109158485841
|
|
|
|
key: test_roc_auc
|
|
value: [0.84461538 0.84615385 0.82615385 0.66833333 0.73 0.79166667
|
|
0.79166667 0.57371795 0.76602564 0.84615385]
|
|
|
|
mean value: 0.7684487179487179
|
|
|
|
key: train_roc_auc
|
|
value: [0.81190704 0.79614127 0.8273539 0.81981982 0.80891418 0.81756757
|
|
0.82420578 0.82283027 0.81179809 0.81398071]
|
|
|
|
mean value: 0.8154518637618948
|
|
|
|
key: test_jcc
|
|
value: [0.82142857 0.86206897 0.82758621 0.71875 0.77419355 0.83333333
|
|
0.83333333 0.64705882 0.75862069 0.85714286]
|
|
|
|
mean value: 0.7933516329223569
|
|
|
|
key: train_jcc
|
|
value: [0.83712121 0.81060606 0.84615385 0.83076923 0.82509506 0.82692308
|
|
0.83397683 0.83461538 0.82889734 0.82824427]
|
|
|
|
mean value: 0.8302402315412067
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.76848578 1.40973759 1.23187423 1.3841064 1.24124241 1.39055371
|
|
1.31246519 1.24122453 1.43185449 1.23146605]
|
|
|
|
mean value: 1.364301037788391
|
|
|
|
key: score_time
|
|
value: [0.01309896 0.01346326 0.01350737 0.01622629 0.01507068 0.01988578
|
|
0.01482821 0.01400661 0.01419473 0.01402307]
|
|
|
|
mean value: 0.014830493927001953
|
|
|
|
key: test_mcc
|
|
value: [0.89293523 0.7725393 0.88307692 0.72145336 0.93883452 0.75809804
|
|
0.64992435 0.64423077 0.64423077 0.77032889]
|
|
|
|
mean value: 0.7675652156307097
|
|
|
|
key: train_mcc
|
|
value: [0.99333067 1. 0.99333067 1. 1. 0.99336957
|
|
1. 1. 1. 0.99334066]
|
|
|
|
mean value: 0.9973371569471698
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 0.89473684 0.94736842 0.86486486 0.97297297 0.89189189
|
|
0.83783784 0.83783784 0.83783784 0.89189189]
|
|
|
|
mean value: 0.8924608819345662
|
|
|
|
key: train_accuracy
|
|
value: [0.99701493 1. 0.99701493 1. 1. 0.99702381
|
|
1. 1. 1. 0.99702381]
|
|
|
|
mean value: 0.9988077469793888
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.92592593 0.96 0.89361702 0.98039216 0.92592593
|
|
0.875 0.875 0.875 0.92307692]
|
|
|
|
mean value: 0.9192271286401449
|
|
|
|
key: train_fscore
|
|
value: [0.99775281 1. 0.99775281 1. 1. 0.99775281
|
|
1. 1. 1. 0.99776286]
|
|
|
|
mean value: 0.9991021290500968
|
|
|
|
key: test_precision
|
|
value: [1. 0.86206897 0.96 0.95454545 0.96153846 0.86206897
|
|
0.91304348 0.875 0.875 0.85714286]
|
|
|
|
mean value: 0.9120408182522125
|
|
|
|
key: train_precision
|
|
value: [0.9955157 1. 0.9955157 1. 1. 0.9955157
|
|
1. 1. 1. 0.99553571]
|
|
|
|
mean value: 0.9982082799487508
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.96 0.84 1. 1. 0.84 0.875 0.875 1. ]
|
|
|
|
mean value: 0.931
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.84615385 0.94153846 0.87833333 0.95833333 0.83333333
|
|
0.83666667 0.82211538 0.82211538 0.84615385]
|
|
|
|
mean value: 0.8744743589743589
|
|
|
|
key: train_roc_auc
|
|
value: [0.99557522 1. 0.99557522 1. 1. 0.99561404
|
|
1. 1. 1. 0.99557522]
|
|
|
|
mean value: 0.9982339698804533
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.86206897 0.92307692 0.80769231 0.96153846 0.86206897
|
|
0.77777778 0.77777778 0.77777778 0.85714286]
|
|
|
|
mean value: 0.8526921813818366
|
|
|
|
key: train_jcc
|
|
value: [0.9955157 1. 0.9955157 1. 1. 0.9955157
|
|
1. 1. 1. 0.99553571]
|
|
|
|
mean value: 0.9982082799487508
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0243485 0.0164957 0.01648617 0.01559806 0.0184691 0.01547146
|
|
0.01629901 0.01876116 0.01333284 0.01748848]
|
|
|
|
mean value: 0.017275047302246094
|
|
|
|
key: score_time
|
|
value: [0.0123992 0.00903821 0.009094 0.00851703 0.00876546 0.00856733
|
|
0.00864244 0.00863147 0.00858784 0.00860715]
|
|
|
|
mean value: 0.00908501148223877
|
|
|
|
key: test_mcc
|
|
value: [0.94415495 0.94211144 1. 0.87666667 0.7689699 0.87666667
|
|
0.82041917 0.94135745 0.94135745 0.72889516]
|
|
|
|
mean value: 0.8840598843111614
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97368421 0.97368421 1. 0.94594595 0.89189189 0.94594595
|
|
0.91891892 0.97297297 0.97297297 0.86486486]
|
|
|
|
mean value: 0.9460881934566145
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 1. 0.96 0.91666667 0.96
|
|
0.93877551 0.97959184 0.97959184 0.88888889]
|
|
|
|
mean value: 0.9583498732826464
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 1. 0.96 0.95652174 0.96
|
|
0.95833333 0.96 0.96 0.95238095]
|
|
|
|
mean value: 0.9668774486383181
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 0.96 0.88 0.96
|
|
0.92 1. 1. 0.83333333]
|
|
|
|
mean value: 0.9513333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96153846 1. 0.93833333 0.89833333 0.93833333
|
|
0.91833333 0.96153846 0.96153846 0.87820513]
|
|
|
|
mean value: 0.9436153846153846
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 1. 0.92307692 0.84615385 0.92307692
|
|
0.88461538 0.96 0.96 0.8 ]
|
|
|
|
mean value: 0.9218461538461539
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.86
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11332464 0.10723877 0.10702634 0.10776043 0.10799861 0.10840106
|
|
0.10843563 0.10721397 0.10700917 0.10656667]
|
|
|
|
mean value: 0.10809752941131592
|
|
|
|
key: score_time
|
|
value: [0.01731491 0.01727319 0.01736617 0.01727343 0.01780224 0.01718664
|
|
0.01729918 0.01744843 0.01738 0.01732063]
|
|
|
|
mean value: 0.017366480827331544
|
|
|
|
key: test_mcc
|
|
value: [0.82277732 0.76283119 0.6405207 0.63 0.69948088 0.8125186
|
|
0.87841046 0.59776258 0.63605297 0.77032889]
|
|
|
|
mean value: 0.7250683598649889
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92105263 0.89473684 0.84210526 0.83783784 0.86486486 0.91891892
|
|
0.94594595 0.81081081 0.83783784 0.89189189]
|
|
|
|
mean value: 0.8766002844950214
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.92307692 0.88461538 0.88 0.89795918 0.94117647
|
|
0.96153846 0.87272727 0.88 0.92307692]
|
|
|
|
mean value: 0.9105347089884904
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.88888889 0.85185185 0.88 0.91666667 0.92307692
|
|
0.92592593 0.77419355 0.84615385 0.85714286]
|
|
|
|
mean value: 0.8786977431170979
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 0.96 0.92 0.88 0.88 0.96
|
|
1. 1. 0.91666667 1. ]
|
|
|
|
mean value: 0.9476666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90307692 0.86461538 0.80615385 0.815 0.85666667 0.89666667
|
|
0.91666667 0.73076923 0.80448718 0.84615385]
|
|
|
|
mean value: 0.844025641025641
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.85714286 0.79310345 0.78571429 0.81481481 0.88888889
|
|
0.92592593 0.77419355 0.78571429 0.85714286]
|
|
|
|
mean value: 0.8371529800895763
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00964475 0.00963831 0.00968194 0.009691 0.00953698 0.00975823
|
|
0.009624 0.00955296 0.00963974 0.00962472]
|
|
|
|
mean value: 0.009639263153076172
|
|
|
|
key: score_time
|
|
value: [0.0086658 0.0086453 0.00860357 0.00866318 0.00879598 0.00859356
|
|
0.00848913 0.00863934 0.00864697 0.00851822]
|
|
|
|
mean value: 0.008626103401184082
|
|
|
|
key: test_mcc
|
|
value: [0.36918879 0.19889806 0.27358924 0.21572775 0.18093902 0.25107848
|
|
0.13666667 0.07813454 0.47639996 0.52564103]
|
|
|
|
mean value: 0.2706263538751131
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71052632 0.65789474 0.68421053 0.64864865 0.64864865 0.64864865
|
|
0.62162162 0.62162162 0.75675676 0.78378378]
|
|
|
|
mean value: 0.6782361308677098
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7755102 0.75471698 0.76923077 0.73469388 0.74509804 0.72340426
|
|
0.72 0.74074074 0.80851064 0.83333333]
|
|
|
|
mean value: 0.760523883890228
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.79166667 0.71428571 0.74074074 0.75 0.73076923 0.77272727
|
|
0.72 0.66666667 0.82608696 0.83333333]
|
|
|
|
mean value: 0.7546276581711364
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.76 0.8 0.8 0.72 0.76 0.68
|
|
0.72 0.83333333 0.79166667 0.83333333]
|
|
|
|
mean value: 0.7698333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.68769231 0.59230769 0.63076923 0.61 0.58833333 0.63166667
|
|
0.56833333 0.53205128 0.74198718 0.76282051]
|
|
|
|
mean value: 0.6345961538461539
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.63333333 0.60606061 0.625 0.58064516 0.59375 0.56666667
|
|
0.5625 0.58823529 0.67857143 0.71428571]
|
|
|
|
mean value: 0.6149048204325719
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.51678085 1.51271772 1.50963163 1.51490927 1.51578307 1.51690912
|
|
1.51508307 1.50265121 1.51495957 1.50335526]
|
|
|
|
mean value: 1.5122780799865723
|
|
|
|
key: score_time
|
|
value: [0.08868313 0.08869243 0.0909524 0.09587669 0.08834338 0.08860993
|
|
0.08987284 0.08919668 0.08966446 0.08873558]
|
|
|
|
mean value: 0.0898627519607544
|
|
|
|
key: test_mcc
|
|
value: [0.94415495 0.88514167 0.88307692 0.93883452 0.87841046 0.87841046
|
|
0.87841046 0.77032889 0.88377887 0.82689823]
|
|
|
|
mean value: 0.8767445437814599
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97368421 0.94736842 0.94736842 0.97297297 0.94594595 0.94594595
|
|
0.94594595 0.89189189 0.94594595 0.91891892]
|
|
|
|
mean value: 0.9435988620199146
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.96153846 0.96 0.98039216 0.96153846 0.96153846
|
|
0.96153846 0.92307692 0.96 0.94117647]
|
|
|
|
mean value: 0.9590391233416443
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.92592593 0.96 0.96153846 0.92592593 0.92592593
|
|
0.92592593 0.85714286 0.92307692 0.88888889]
|
|
|
|
mean value: 0.9294350834350834
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 1. 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.992
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.92307692 0.94153846 0.95833333 0.91666667 0.91666667
|
|
0.91666667 0.84615385 0.92307692 0.88461538]
|
|
|
|
mean value: 0.9206794871794872
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.92592593 0.92307692 0.96153846 0.92592593 0.92592593
|
|
0.92592593 0.85714286 0.92307692 0.88888889]
|
|
|
|
mean value: 0.9217427757427757
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.95
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.7981739 0.90559983 0.92962551 0.94709492 0.92527056 0.92396808
|
|
0.92568278 0.91854715 0.97762322 0.9265461 ]
|
|
|
|
mean value: 1.0178132057189941
|
|
|
|
key: score_time
|
|
value: [0.15995264 0.25659728 0.31224418 0.21547484 0.25620723 0.23124361
|
|
0.2417984 0.25423598 0.22519088 0.24117589]
|
|
|
|
mean value: 0.23941209316253662
|
|
|
|
key: test_mcc
|
|
value: [0.88307692 0.82874193 0.76283119 0.74840571 0.87841046 0.87841046
|
|
0.87841046 0.77032889 0.88377887 0.82689823]
|
|
|
|
mean value: 0.8339293123779081
|
|
|
|
key: train_mcc
|
|
value: [0.9208524 0.94047649 0.93392475 0.94083339 0.93432235 0.94735571
|
|
0.94083339 0.94055866 0.94711316 0.94711316]
|
|
|
|
mean value: 0.9393383453250792
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 0.92105263 0.89473684 0.89189189 0.94594595 0.94594595
|
|
0.94594595 0.89189189 0.94594595 0.91891892]
|
|
|
|
mean value: 0.9249644381223329
|
|
|
|
key: train_accuracy
|
|
value: [0.9641791 0.97313433 0.97014925 0.97321429 0.9702381 0.97619048
|
|
0.97321429 0.97321429 0.97619048 0.97619048]
|
|
|
|
mean value: 0.9725915067519545
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.94339623 0.92307692 0.92307692 0.96153846 0.96153846
|
|
0.96153846 0.92307692 0.96 0.94117647]
|
|
|
|
mean value: 0.9458418850849484
|
|
|
|
key: train_fscore
|
|
value: [0.97368421 0.98013245 0.97797357 0.98013245 0.97797357 0.98230088
|
|
0.98013245 0.98021978 0.98237885 0.98237885]
|
|
|
|
mean value: 0.9797307072510203
|
|
|
|
key: test_precision
|
|
value: [0.96 0.89285714 0.88888889 0.88888889 0.92592593 0.92592593
|
|
0.92592593 0.85714286 0.92307692 0.88888889]
|
|
|
|
mean value: 0.9077521367521367
|
|
|
|
key: train_precision
|
|
value: [0.94871795 0.96103896 0.95689655 0.96103896 0.95689655 0.96521739
|
|
0.96103896 0.9612069 0.96536797 0.96536797]
|
|
|
|
mean value: 0.960278815387511
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 0.96 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.988
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94153846 0.88461538 0.86461538 0.855 0.91666667 0.91666667
|
|
0.91666667 0.84615385 0.92307692 0.88461538]
|
|
|
|
mean value: 0.8949615384615385
|
|
|
|
key: train_roc_auc
|
|
value: [0.94690265 0.96017699 0.95575221 0.96052632 0.95614035 0.96491228
|
|
0.96052632 0.96017699 0.96460177 0.96460177]
|
|
|
|
mean value: 0.9594317652538426
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.89285714 0.85714286 0.85714286 0.92592593 0.92592593
|
|
0.92592593 0.85714286 0.92307692 0.88888889]
|
|
|
|
mean value: 0.8977106227106227
|
|
|
|
key: train_jcc
|
|
value: [0.94871795 0.96103896 0.95689655 0.96103896 0.95689655 0.96521739
|
|
0.96103896 0.9612069 0.96536797 0.96536797]
|
|
|
|
mean value: 0.960278815387511
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02384973 0.00983047 0.00995731 0.00989604 0.00971317 0.00980139
|
|
0.00994396 0.01081944 0.01022983 0.00976181]
|
|
|
|
mean value: 0.011380314826965332
|
|
|
|
key: score_time
|
|
value: [0.01449561 0.00887012 0.00886655 0.00883055 0.00878501 0.00887728
|
|
0.00960946 0.00903845 0.0088439 0.0090878 ]
|
|
|
|
mean value: 0.009530472755432128
|
|
|
|
key: test_mcc
|
|
value: [0.82916172 0.48418203 0.57680438 0.55988677 0.61840096 0.61840096
|
|
0.55988677 0.16354278 0.51219003 0.69941809]
|
|
|
|
mean value: 0.5621874492561967
|
|
|
|
key: train_mcc
|
|
value: [0.63762292 0.64371935 0.66852645 0.65437497 0.64566458 0.66878177
|
|
0.66707378 0.67529593 0.63811234 0.66643709]
|
|
|
|
mean value: 0.6565609170305504
|
|
|
|
key: test_accuracy
|
|
value: [0.92105263 0.76315789 0.81578947 0.81081081 0.83783784 0.83783784
|
|
0.81081081 0.64864865 0.78378378 0.86486486]
|
|
|
|
mean value: 0.8094594594594595
|
|
|
|
key: train_accuracy
|
|
value: [0.84179104 0.84477612 0.85373134 0.84821429 0.8452381 0.85416667
|
|
0.85416667 0.85714286 0.8422619 0.85416667]
|
|
|
|
mean value: 0.8495655650319829
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.81632653 0.86792453 0.8627451 0.88461538 0.88461538
|
|
0.8627451 0.75471698 0.84 0.90196078]
|
|
|
|
mean value: 0.8614425299873215
|
|
|
|
key: train_fscore
|
|
value: [0.88453159 0.88744589 0.89135255 0.88840263 0.88793103 0.89230769
|
|
0.89370933 0.89427313 0.88503254 0.89324619]
|
|
|
|
mean value: 0.8898232560986886
|
|
|
|
key: test_precision
|
|
value: [0.95833333 0.83333333 0.82142857 0.84615385 0.85185185 0.85185185
|
|
0.84615385 0.68965517 0.80769231 0.85185185]
|
|
|
|
mean value: 0.8358305966064586
|
|
|
|
key: train_precision
|
|
value: [0.85654008 0.85416667 0.87772926 0.86382979 0.85123967 0.87124464
|
|
0.86192469 0.87878788 0.85714286 0.86864407]
|
|
|
|
mean value: 0.8641249590465252
|
|
|
|
key: test_recall
|
|
value: [0.92 0.8 0.92 0.88 0.92 0.92
|
|
0.88 0.83333333 0.875 0.95833333]
|
|
|
|
mean value: 0.8906666666666667
|
|
|
|
key: train_recall
|
|
value: [0.91441441 0.92342342 0.90540541 0.91441441 0.92792793 0.91441441
|
|
0.92792793 0.9103139 0.91479821 0.91928251]
|
|
|
|
mean value: 0.9172322546762008
|
|
|
|
key: test_roc_auc
|
|
value: [0.92153846 0.74615385 0.76769231 0.77333333 0.79333333 0.79333333
|
|
0.77333333 0.57051282 0.74519231 0.82532051]
|
|
|
|
mean value: 0.7709743589743591
|
|
|
|
key: train_roc_auc
|
|
value: [0.80676473 0.80684446 0.8288089 0.81685633 0.80606923 0.82562826
|
|
0.81922712 0.83126315 0.80695663 0.82247311]
|
|
|
|
mean value: 0.8170891905270462
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.68965517 0.76666667 0.75862069 0.79310345 0.79310345
|
|
0.75862069 0.60606061 0.72413793 0.82142857]
|
|
|
|
mean value: 0.7596012608081574
|
|
|
|
key: train_jcc
|
|
value: [0.79296875 0.79766537 0.804 0.7992126 0.79844961 0.80555556
|
|
0.80784314 0.80876494 0.79377432 0.80708661]
|
|
|
|
mean value: 0.8015320896766981
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.10048103 0.21620488 0.05475497 0.06091928 0.06145883 0.06205487
|
|
0.07969356 0.05818295 0.0811553 0.06088591]
|
|
|
|
mean value: 0.08357915878295899
|
|
|
|
key: score_time
|
|
value: [0.01126313 0.01096892 0.01093197 0.01062346 0.0104332 0.01046801
|
|
0.01205325 0.0109458 0.01278877 0.01082683]
|
|
|
|
mean value: 0.011130332946777344
|
|
|
|
key: test_mcc
|
|
value: [0.94415495 0.94211144 1. 0.87666667 0.87841046 0.87841046
|
|
0.87666667 0.94135745 0.88141026 0.82660125]
|
|
|
|
mean value: 0.9045789602681934
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97368421 0.97368421 1. 0.94594595 0.94594595 0.94594595
|
|
0.94594595 0.97297297 0.94594595 0.91891892]
|
|
|
|
mean value: 0.9568990042674254
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 1. 0.96 0.96153846 0.96153846
|
|
0.96 0.97959184 0.95833333 0.93617021]
|
|
|
|
mean value: 0.9677156299508346
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 1. 0.96 0.92592593 0.92592593
|
|
0.96 0.96 0.95833333 0.95652174]
|
|
|
|
mean value: 0.9608245385854082
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 0.96 1. 1.
|
|
0.96 1. 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9755
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96153846 1. 0.93833333 0.91666667 0.91666667
|
|
0.93833333 0.96153846 0.94070513 0.91987179]
|
|
|
|
mean value: 0.9473653846153847
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 1. 0.92307692 0.92592593 0.92592593
|
|
0.92307692 0.96 0.92 0.88 ]
|
|
|
|
mean value: 0.937954415954416
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04090953 0.03238916 0.05295444 0.05300117 0.03476834 0.05470324
|
|
0.03301048 0.09418178 0.0506978 0.06357265]
|
|
|
|
mean value: 0.05101885795593262
|
|
|
|
key: score_time
|
|
value: [0.01226592 0.01207089 0.01227951 0.02103472 0.01245785 0.01222873
|
|
0.01223779 0.03396845 0.0249002 0.02325702]
|
|
|
|
mean value: 0.017670106887817384
|
|
|
|
key: test_mcc
|
|
value: [0.89293523 0.82874193 0.89293523 0.67718635 0.69948088 0.88801544
|
|
0.74840571 0.82041917 0.82041917 0.88377887]
|
|
|
|
mean value: 0.815231798513477
|
|
|
|
key: train_mcc
|
|
value: [0.9734111 0.96659232 0.9665638 0.97341986 0.97341986 0.96678929
|
|
0.96008534 0.97330264 0.97330264 0.97330264]
|
|
|
|
mean value: 0.9700189503916914
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 0.92105263 0.94736842 0.83783784 0.86486486 0.94594595
|
|
0.89189189 0.91891892 0.91891892 0.94594595]
|
|
|
|
mean value: 0.9140113798008535
|
|
|
|
key: train_accuracy
|
|
value: [0.9880597 0.98507463 0.98507463 0.98809524 0.98809524 0.98511905
|
|
0.98214286 0.98809524 0.98809524 0.98809524]
|
|
|
|
mean value: 0.9865947050461976
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.94339623 0.95833333 0.86956522 0.89795918 0.95833333
|
|
0.92307692 0.93877551 0.93877551 0.96 ]
|
|
|
|
mean value: 0.9346548570964954
|
|
|
|
key: train_fscore
|
|
value: [0.99107143 0.98881432 0.98876404 0.99103139 0.99103139 0.98881432
|
|
0.98654709 0.99107143 0.99107143 0.99107143]
|
|
|
|
mean value: 0.9899288260047142
|
|
|
|
key: test_precision
|
|
value: [1. 0.89285714 1. 0.95238095 0.91666667 1.
|
|
0.88888889 0.92 0.92 0.92307692]
|
|
|
|
mean value: 0.9413870573870574
|
|
|
|
key: train_precision
|
|
value: [0.98230088 0.98222222 0.98654709 0.98660714 0.98660714 0.98222222
|
|
0.98214286 0.98666667 0.98666667 0.98666667]
|
|
|
|
mean value: 0.9848649557459134
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.92 0.8 0.88 0.92
|
|
0.96 0.95833333 0.95833333 1. ]
|
|
|
|
mean value: 0.9316666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 0.9954955 0.99099099 0.9954955 0.9954955 0.9954955
|
|
0.99099099 0.9955157 0.9955157 0.9955157 ]
|
|
|
|
mean value: 0.9950511049165758
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.88461538 0.96 0.85833333 0.85666667 0.96
|
|
0.855 0.90224359 0.90224359 0.92307692]
|
|
|
|
mean value: 0.9062179487179487
|
|
|
|
key: train_roc_auc
|
|
value: [0.98230088 0.98004863 0.98222116 0.98458985 0.98458985 0.98020389
|
|
0.97795164 0.98448351 0.98448351 0.98448351]
|
|
|
|
mean value: 0.982535644058971
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.89285714 0.92 0.76923077 0.81481481 0.92
|
|
0.85714286 0.88461538 0.88461538 0.92307692]
|
|
|
|
mean value: 0.8786353276353276
|
|
|
|
key: train_jcc
|
|
value: [0.98230088 0.97787611 0.97777778 0.98222222 0.98222222 0.97787611
|
|
0.97345133 0.98230088 0.98230088 0.98230088]
|
|
|
|
mean value: 0.980062930186824
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01893115 0.0106504 0.00960517 0.0100739 0.00992584 0.00969601
|
|
0.01068687 0.00961399 0.0095706 0.01010799]
|
|
|
|
mean value: 0.010886192321777344
|
|
|
|
key: score_time
|
|
value: [0.01709676 0.0091002 0.00870395 0.00866795 0.00969434 0.00951099
|
|
0.00945115 0.00916266 0.0086863 0.00898147]
|
|
|
|
mean value: 0.009905576705932617
|
|
|
|
key: test_mcc
|
|
value: [0.82916172 0.6405207 0.76615385 0.41461399 0.54980516 0.69721669
|
|
0.68374939 0.43859201 0.47639996 0.75991592]
|
|
|
|
mean value: 0.6256129389552048
|
|
|
|
key: train_mcc
|
|
value: [0.64332287 0.63106951 0.66458306 0.68777627 0.65320222 0.65320222
|
|
0.64066565 0.69324635 0.67930174 0.63695182]
|
|
|
|
mean value: 0.658332169194295
|
|
|
|
key: test_accuracy
|
|
value: [0.92105263 0.84210526 0.89473684 0.75675676 0.81081081 0.86486486
|
|
0.86486486 0.75675676 0.75675676 0.89189189]
|
|
|
|
mean value: 0.8360597439544808
|
|
|
|
key: train_accuracy
|
|
value: [0.84477612 0.83880597 0.85373134 0.86309524 0.84821429 0.84821429
|
|
0.8422619 0.86607143 0.86011905 0.8422619 ]
|
|
|
|
mean value: 0.8507551528073917
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.88461538 0.92 0.83018868 0.87272727 0.90909091
|
|
0.90566038 0.83018868 0.80851064 0.92 ]
|
|
|
|
mean value: 0.8819757450784577
|
|
|
|
key: train_fscore
|
|
value: [0.88793103 0.88209607 0.89416847 0.9004329 0.88937093 0.88937093
|
|
0.88402626 0.90280778 0.89848812 0.88602151]
|
|
|
|
mean value: 0.8914713996727421
|
|
|
|
key: test_precision
|
|
value: [0.95833333 0.85185185 0.92 0.78571429 0.8 0.83333333
|
|
0.85714286 0.75862069 0.82608696 0.88461538]
|
|
|
|
mean value: 0.8475698692167958
|
|
|
|
key: train_precision
|
|
value: [0.85123967 0.8559322 0.85892116 0.86666667 0.85774059 0.85774059
|
|
0.85957447 0.87083333 0.86666667 0.85123967]
|
|
|
|
mean value: 0.8596555010358422
|
|
|
|
key: test_recall
|
|
value: [0.92 0.92 0.92 0.88 0.96 1.
|
|
0.96 0.91666667 0.79166667 0.95833333]
|
|
|
|
mean value: 0.9226666666666666
|
|
|
|
key: train_recall
|
|
value: [0.92792793 0.90990991 0.93243243 0.93693694 0.92342342 0.92342342
|
|
0.90990991 0.93721973 0.93273543 0.92376682]
|
|
|
|
mean value: 0.9257685937058134
|
|
|
|
key: test_roc_auc
|
|
value: [0.92153846 0.80615385 0.88307692 0.69 0.73 0.79166667
|
|
0.81333333 0.68910256 0.74198718 0.86378205]
|
|
|
|
mean value: 0.7930641025641025
|
|
|
|
key: train_roc_auc
|
|
value: [0.80467193 0.80451248 0.81577374 0.82811759 0.8125889 0.8125889
|
|
0.81021811 0.83144172 0.82477479 0.80259137]
|
|
|
|
mean value: 0.8147279546700159
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.79310345 0.85185185 0.70967742 0.77419355 0.83333333
|
|
0.82758621 0.70967742 0.67857143 0.85185185]
|
|
|
|
mean value: 0.7914461892493039
|
|
|
|
key: train_jcc
|
|
value: [0.79844961 0.7890625 0.80859375 0.81889764 0.80078125 0.80078125
|
|
0.79215686 0.82283465 0.81568627 0.7953668 ]
|
|
|
|
mean value: 0.8042610578489365
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01611042 0.01840901 0.01934195 0.01798987 0.02220845 0.02180552
|
|
0.02408409 0.02300167 0.01865768 0.02133894]
|
|
|
|
mean value: 0.020294761657714842
|
|
|
|
key: score_time
|
|
value: [0.00874615 0.01107001 0.01109815 0.01175427 0.01189899 0.01181722
|
|
0.01179576 0.01186514 0.01196933 0.01178503]
|
|
|
|
mean value: 0.0113800048828125
|
|
|
|
key: test_mcc
|
|
value: [0.89293523 0.7725393 0.82916172 0.50666667 0.87666667 0.63500064
|
|
0.93883452 0.52960948 0.82689823 0.52814014]
|
|
|
|
mean value: 0.7336452598997402
|
|
|
|
key: train_mcc
|
|
value: [0.95993508 0.92646248 0.96019601 0.9005547 0.96678929 0.83126512
|
|
0.96675933 0.88314972 0.90066602 0.45678449]
|
|
|
|
mean value: 0.8752562259274668
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 0.89473684 0.92105263 0.78378378 0.94594595 0.83783784
|
|
0.97297297 0.75675676 0.91891892 0.72972973]
|
|
|
|
mean value: 0.8709103840682788
|
|
|
|
key: train_accuracy
|
|
value: [0.98208955 0.96716418 0.98208955 0.95535714 0.98511905 0.92261905
|
|
0.98511905 0.94345238 0.95535714 0.62797619]
|
|
|
|
mean value: 0.930634328358209
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.92592593 0.93877551 0.84 0.96 0.89285714
|
|
0.98039216 0.79069767 0.94117647 0.75 ]
|
|
|
|
mean value: 0.8978158214190068
|
|
|
|
key: train_fscore
|
|
value: [0.98660714 0.97560976 0.98642534 0.96703297 0.98881432 0.94468085
|
|
0.98876404 0.95571096 0.96732026 0.6105919 ]
|
|
|
|
mean value: 0.9371557536495605
|
|
|
|
key: test_precision
|
|
value: [1. 0.86206897 0.95833333 0.84 0.96 0.80645161
|
|
0.96153846 0.89473684 0.88888889 0.9375 ]
|
|
|
|
mean value: 0.9109518104286414
|
|
|
|
key: train_precision
|
|
value: [0.97787611 0.96069869 0.99090909 0.94420601 0.98222222 0.89516129
|
|
0.98654709 0.99514563 0.94067797 1. ]
|
|
|
|
mean value: 0.9673444090560057
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.92 0.84 0.96 1.
|
|
1. 0.70833333 1. 0.625 ]
|
|
|
|
mean value: 0.8973333333333333
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.99099099 0.98198198 0.99099099 0.9954955 1.
|
|
0.99099099 0.91928251 0.9955157 0.43946188]
|
|
|
|
mean value: 0.9300206035632045
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.84615385 0.92153846 0.75333333 0.93833333 0.75
|
|
0.95833333 0.77724359 0.88461538 0.77403846]
|
|
|
|
mean value: 0.8563589743589743
|
|
|
|
key: train_roc_auc
|
|
value: [0.97562385 0.95567249 0.98214143 0.93847795 0.98020389 0.88596491
|
|
0.9823376 0.95521648 0.93581094 0.71973094]
|
|
|
|
mean value: 0.931118049025819
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.86206897 0.88461538 0.72413793 0.92307692 0.80645161
|
|
0.96153846 0.65384615 0.88888889 0.6 ]
|
|
|
|
mean value: 0.8224624321420761
|
|
|
|
key: train_jcc
|
|
value: [0.97356828 0.95238095 0.97321429 0.93617021 0.97787611 0.89516129
|
|
0.97777778 0.91517857 0.93670886 0.43946188]
|
|
|
|
mean value: 0.8977498222690707
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01817489 0.01602721 0.0174787 0.0170126 0.0176394 0.01785445
|
|
0.0176723 0.01650739 0.01890135 0.01761723]
|
|
|
|
mean value: 0.017488551139831544
|
|
|
|
key: score_time
|
|
value: [0.01187253 0.0119226 0.01182389 0.01184249 0.01186943 0.01180172
|
|
0.0117867 0.0118196 0.01181936 0.01181602]
|
|
|
|
mean value: 0.011837434768676759
|
|
|
|
key: test_mcc
|
|
value: [0.88307692 0.7725393 0.82277732 0.71180522 0.81831709 0.75333333
|
|
0.72145336 0.76282051 0.82041917 0.76282051]
|
|
|
|
mean value: 0.7829362739557089
|
|
|
|
key: train_mcc
|
|
value: [0.78507685 0.88089738 0.94106585 0.90753918 0.79928718 0.86510715
|
|
0.93518035 0.95345422 0.93352229 0.91658801]
|
|
|
|
mean value: 0.8917718459489616
|
|
|
|
key: test_accuracy
|
|
value: [0.94736842 0.89473684 0.92105263 0.83783784 0.91891892 0.89189189
|
|
0.86486486 0.89189189 0.91891892 0.89189189]
|
|
|
|
mean value: 0.8979374110953058
|
|
|
|
key: train_accuracy
|
|
value: [0.90149254 0.94626866 0.97313433 0.95833333 0.9077381 0.93452381
|
|
0.9702381 0.97916667 0.9702381 0.96130952]
|
|
|
|
mean value: 0.9502443141435679
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.92592593 0.94117647 0.86363636 0.94339623 0.92
|
|
0.89361702 0.91666667 0.93877551 0.91666667]
|
|
|
|
mean value: 0.921986085137963
|
|
|
|
key: train_fscore
|
|
value: [0.93081761 0.96086957 0.97949886 0.96832579 0.93473684 0.94835681
|
|
0.97716895 0.98426966 0.97797357 0.97025172]
|
|
|
|
mean value: 0.963226937502244
|
|
|
|
key: test_precision
|
|
value: [0.96 0.86206897 0.92307692 1. 0.89285714 0.92
|
|
0.95454545 0.91666667 0.92 0.91666667]
|
|
|
|
mean value: 0.9265881819330095
|
|
|
|
key: train_precision
|
|
value: [0.87058824 0.92857143 0.99078341 0.97272727 0.87747036 0.99019608
|
|
0.99074074 0.98648649 0.96103896 0.99065421]
|
|
|
|
mean value: 0.955925717476733
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 0.76 1. 0.92
|
|
0.84 0.91666667 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9231666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.9954955 0.96846847 0.96396396 1. 0.90990991
|
|
0.96396396 0.98206278 0.9955157 0.95067265]
|
|
|
|
mean value: 0.9730052922878035
|
|
|
|
key: test_roc_auc
|
|
value: [0.94153846 0.84615385 0.90307692 0.88 0.875 0.87666667
|
|
0.87833333 0.88141026 0.90224359 0.88141026]
|
|
|
|
mean value: 0.8865833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.8539823 0.92252651 0.97538468 0.95566619 0.86403509 0.94618303
|
|
0.97321005 0.97775705 0.95793484 0.96648677]
|
|
|
|
mean value: 0.939316650180542
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.86206897 0.88888889 0.76 0.89285714 0.85185185
|
|
0.80769231 0.84615385 0.88461538 0.84615385]
|
|
|
|
mean value: 0.8563359156807433
|
|
|
|
key: train_jcc
|
|
value: [0.87058824 0.92468619 0.95982143 0.93859649 0.87747036 0.90178571
|
|
0.95535714 0.96902655 0.95689655 0.94222222]
|
|
|
|
mean value: 0.9296450883055245
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16002059 0.14514327 0.14576077 0.14494777 0.1454823 0.14638209
|
|
0.1459353 0.14922428 0.14597917 0.14762712]
|
|
|
|
mean value: 0.14765026569366455
|
|
|
|
key: score_time
|
|
value: [0.01491308 0.01496863 0.01616168 0.01505423 0.01491213 0.01498389
|
|
0.01488948 0.01515555 0.01542425 0.01537681]
|
|
|
|
mean value: 0.015183973312377929
|
|
|
|
key: test_mcc
|
|
value: [0.94415495 0.94211144 0.88307692 0.93883452 0.8125186 0.87841046
|
|
0.93883452 0.88141026 0.94135745 0.82660125]
|
|
|
|
mean value: 0.898731037474338
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97368421 0.97368421 0.94736842 0.97297297 0.91891892 0.94594595
|
|
0.97297297 0.94594595 0.97297297 0.91891892]
|
|
|
|
mean value: 0.9543385490753912
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 0.96 0.98039216 0.94117647 0.96153846
|
|
0.98039216 0.95833333 0.97959184 0.93617021]
|
|
|
|
mean value: 0.965757862228361
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 0.96 0.96153846 0.92307692 0.92592593
|
|
0.96153846 0.95833333 0.96 0.95652174]
|
|
|
|
mean value: 0.9568473306082002
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 1. 0.96 1.
|
|
1. 0.95833333 1. 0.91666667]
|
|
|
|
mean value: 0.9755
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96153846 0.94153846 0.95833333 0.89666667 0.91666667
|
|
0.95833333 0.94070513 0.96153846 0.91987179]
|
|
|
|
mean value: 0.9435192307692307
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 0.92307692 0.96153846 0.88888889 0.92592593
|
|
0.96153846 0.92 0.96 0.88 ]
|
|
|
|
mean value: 0.9342507122507122
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05391145 0.0531919 0.05098367 0.08077931 0.07559562 0.05956149
|
|
0.07425714 0.05574298 0.0775423 0.05907965]
|
|
|
|
mean value: 0.06406455039978028
|
|
|
|
key: score_time
|
|
value: [0.02093887 0.02257323 0.02400708 0.03417873 0.03756571 0.03825617
|
|
0.03833842 0.03335881 0.04276466 0.01980209]
|
|
|
|
mean value: 0.031178379058837892
|
|
|
|
key: test_mcc
|
|
value: [0.94415495 0.94211144 1. 0.93883452 0.69948088 0.87841046
|
|
0.87666667 0.94135745 0.94135745 0.82660125]
|
|
|
|
mean value: 0.8988975069351781
|
|
|
|
key: train_mcc
|
|
value: [0.98667641 0.99335949 0.99333067 0.99336957 0.98672357 0.99336957
|
|
0.97344713 0.99336957 0.99334066 0.99334066]
|
|
|
|
mean value: 0.9900327295636625
|
|
|
|
key: test_accuracy
|
|
value: [0.97368421 0.97368421 1. 0.97297297 0.86486486 0.94594595
|
|
0.94594595 0.97297297 0.97297297 0.91891892]
|
|
|
|
mean value: 0.9541963015647227
|
|
|
|
key: train_accuracy
|
|
value: [0.99402985 0.99701493 0.99701493 0.99702381 0.99404762 0.99702381
|
|
0.98809524 0.99702381 0.99702381 0.99702381]
|
|
|
|
mean value: 0.9955321606254443
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 1. 0.98039216 0.89795918 0.96153846
|
|
0.96 0.97959184 0.97959184 0.93617021]
|
|
|
|
mean value: 0.965522768190746
|
|
|
|
key: train_fscore
|
|
value: [0.9955157 0.99774266 0.99775281 0.99775281 0.9954955 0.99775281
|
|
0.99099099 0.99775281 0.99776286 0.99776286]
|
|
|
|
mean value: 0.9966281808235044
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 1. 0.96153846 0.91666667 0.92592593
|
|
0.96 0.96 0.96 0.95652174]
|
|
|
|
mean value: 0.960219125479995
|
|
|
|
key: train_precision
|
|
value: [0.99107143 1. 0.9955157 0.9955157 0.9954955 0.9955157
|
|
0.99099099 1. 0.99553571 0.99553571]
|
|
|
|
mean value: 0.9955176428831137
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 1. 0.88 1.
|
|
0.96 1. 1. 0.91666667]
|
|
|
|
mean value: 0.9716666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.9954955 1. 1. 0.9954955 1.
|
|
0.99099099 0.9955157 1. 1. ]
|
|
|
|
mean value: 0.9977497677049246
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96153846 1. 0.95833333 0.85666667 0.91666667
|
|
0.93833333 0.96153846 0.96153846 0.91987179]
|
|
|
|
mean value: 0.945448717948718
|
|
|
|
key: train_roc_auc
|
|
value: [0.99115044 0.99774775 0.99557522 0.99561404 0.99336178 0.99561404
|
|
0.98672357 0.99775785 0.99557522 0.99557522]
|
|
|
|
mean value: 0.994469512015791
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 1. 0.96153846 0.81481481 0.92592593
|
|
0.92307692 0.96 0.96 0.88 ]
|
|
|
|
mean value: 0.9346894586894587
|
|
|
|
key: train_jcc
|
|
value: [0.99107143 0.9954955 0.9955157 0.9955157 0.99103139 0.9955157
|
|
0.98214286 0.9955157 0.99553571 0.99553571]
|
|
|
|
mean value: 0.9932875380184797
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09932303 0.11552787 0.10941839 0.11004257 0.0523293 0.09461427
|
|
0.10743093 0.11749291 0.09413934 0.09523654]
|
|
|
|
mean value: 0.09955551624298095
|
|
|
|
key: score_time
|
|
value: [0.0217464 0.02210569 0.02551961 0.02250648 0.01413608 0.02570868
|
|
0.02161288 0.02219081 0.02209234 0.02191257]
|
|
|
|
mean value: 0.02195315361022949
|
|
|
|
key: test_mcc
|
|
value: [0.45083561 0.25064609 0.44835883 0.54980516 0.47836715 0.68620269
|
|
0.61784991 0.53708616 0.38832708 0.57611663]
|
|
|
|
mean value: 0.49835952828494806
|
|
|
|
key: train_mcc
|
|
value: [0.94703929 0.98003672 0.96020006 0.94735571 0.95388981 0.95388981
|
|
0.95388981 0.9668468 0.97344973 0.95367901]
|
|
|
|
mean value: 0.9590276756041958
|
|
|
|
key: test_accuracy
|
|
value: [0.76315789 0.68421053 0.76315789 0.81081081 0.78378378 0.86486486
|
|
0.83783784 0.78378378 0.72972973 0.81081081]
|
|
|
|
mean value: 0.7832147937411096
|
|
|
|
key: train_accuracy
|
|
value: [0.9761194 0.99104478 0.98208955 0.97619048 0.97916667 0.97916667
|
|
0.97916667 0.98511905 0.98809524 0.97916667]
|
|
|
|
mean value: 0.9815325159914712
|
|
|
|
key: test_fscore
|
|
value: [0.83018868 0.77777778 0.84210526 0.87272727 0.85714286 0.90196078
|
|
0.88888889 0.85714286 0.8 0.86792453]
|
|
|
|
mean value: 0.8495858908698444
|
|
|
|
key: train_fscore
|
|
value: [0.98230088 0.99328859 0.98666667 0.98230088 0.98447894 0.98447894
|
|
0.98447894 0.98891353 0.99111111 0.98454746]
|
|
|
|
mean value: 0.9862565932256198
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.72413793 0.75 0.8 0.77419355 0.88461538
|
|
0.82758621 0.75 0.76923077 0.79310345]
|
|
|
|
mean value: 0.7858581574154433
|
|
|
|
key: train_precision
|
|
value: [0.96521739 0.98666667 0.97368421 0.96521739 0.96943231 0.96943231
|
|
0.96943231 0.97807018 0.98237885 0.96956522]
|
|
|
|
mean value: 0.9729096850488571
|
|
|
|
key: test_recall
|
|
value: [0.88 0.84 0.96 0.96 0.96 0.92
|
|
0.96 1. 0.83333333 0.95833333]
|
|
|
|
mean value: 0.9271666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70923077 0.61230769 0.67230769 0.73 0.68833333 0.835
|
|
0.77166667 0.69230769 0.68589744 0.74839744]
|
|
|
|
mean value: 0.7145448717948717
|
|
|
|
key: train_roc_auc
|
|
value: [0.96460177 0.98672566 0.97345133 0.96491228 0.96929825 0.96929825
|
|
0.96929825 0.97787611 0.98230088 0.96902655]
|
|
|
|
mean value: 0.9726789318428816
|
|
|
|
key: test_jcc
|
|
value: [0.70967742 0.63636364 0.72727273 0.77419355 0.75 0.82142857
|
|
0.8 0.75 0.66666667 0.76666667]
|
|
|
|
mean value: 0.7402269236140204
|
|
|
|
key: train_jcc
|
|
value: [0.96521739 0.98666667 0.97368421 0.96521739 0.96943231 0.96943231
|
|
0.96943231 0.97807018 0.98237885 0.96956522]
|
|
|
|
mean value: 0.9729096850488571
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.53125405 0.52659512 0.51846385 0.5213213 0.51650882 0.51792812
|
|
0.52455926 0.51938128 0.5282321 0.52801538]
|
|
|
|
mean value: 0.5232259273529053
|
|
|
|
key: score_time
|
|
value: [0.0104146 0.00927186 0.00926518 0.00936866 0.00938082 0.00925636
|
|
0.00925016 0.00930786 0.00949931 0.00930738]
|
|
|
|
mean value: 0.009432220458984375
|
|
|
|
key: test_mcc
|
|
value: [0.84557673 0.88514167 1. 0.93883452 0.7689699 0.87841046
|
|
0.87666667 0.94135745 0.88141026 0.82660125]
|
|
|
|
mean value: 0.8842968900359525
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92105263 0.94736842 1. 0.97297297 0.89189189 0.94594595
|
|
0.94594595 0.97297297 0.94594595 0.91891892]
|
|
|
|
mean value: 0.9463015647226174
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 0.96153846 1. 0.98039216 0.91666667 0.96153846
|
|
0.96 0.97959184 0.95833333 0.93617021]
|
|
|
|
mean value: 0.9590401342206276
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.92592593 1. 0.96153846 0.95652174 0.92592593
|
|
0.96 0.96 0.95833333 0.95652174]
|
|
|
|
mean value: 0.9604767124984517
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88 1. 1. 1. 0.88 1.
|
|
0.96 1. 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9595
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.92307692 1. 0.95833333 0.89833333 0.91666667
|
|
0.93833333 0.96153846 0.94070513 0.91987179]
|
|
|
|
mean value: 0.9396858974358975
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88 0.92592593 1. 0.96153846 0.84615385 0.92592593
|
|
0.92307692 0.96 0.92 0.88 ]
|
|
|
|
mean value: 0.9222621082621083
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02706528 0.03005981 0.02612472 0.03254724 0.0260458 0.03336263
|
|
0.0261786 0.02648377 0.03136325 0.04712701]
|
|
|
|
mean value: 0.030635809898376463
|
|
|
|
key: score_time
|
|
value: [0.01234508 0.01247501 0.01268005 0.01250315 0.01379752 0.0125246
|
|
0.01405263 0.0139327 0.01252151 0.02474666]
|
|
|
|
mean value: 0.014157891273498535
|
|
|
|
key: test_mcc
|
|
value: [ 0.21159842 0.07844645 0.21159842 0.05685735 -0.03984095 -0.10498016
|
|
-0.05527708 0.29072859 0.29072859 0.20588303]
|
|
|
|
mean value: 0.11457426794061795
|
|
|
|
key: train_mcc
|
|
value: [0.36247836 0.37160063 0.38937018 0.35107145 0.36031176 0.37828212
|
|
0.36031176 0.34392233 0.34392233 0.36271672]
|
|
|
|
mean value: 0.3623987647752257
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.65789474 0.68421053 0.62162162 0.59459459 0.59459459
|
|
0.62162162 0.7027027 0.7027027 0.67567568]
|
|
|
|
mean value: 0.6539829302987198
|
|
|
|
key: train_accuracy
|
|
value: [0.72537313 0.72835821 0.73432836 0.7202381 0.72321429 0.72916667
|
|
0.72321429 0.7202381 0.7202381 0.72619048]
|
|
|
|
mean value: 0.7250559701492537
|
|
|
|
key: test_fscore
|
|
value: [0.79310345 0.78688525 0.79310345 0.74074074 0.72727273 0.73684211
|
|
0.75862069 0.80701754 0.80701754 0.78571429]
|
|
|
|
mean value: 0.7736317778818746
|
|
|
|
key: train_fscore
|
|
value: [0.82835821 0.82990654 0.83302064 0.82527881 0.82681564 0.82990654
|
|
0.82681564 0.82592593 0.82592593 0.82899628]
|
|
|
|
mean value: 0.8280950160670916
|
|
|
|
key: test_precision
|
|
value: [0.6969697 0.66666667 0.6969697 0.68965517 0.66666667 0.65625
|
|
0.66666667 0.6969697 0.6969697 0.6875 ]
|
|
|
|
mean value: 0.6821283960292581
|
|
|
|
key: train_precision
|
|
value: [0.70700637 0.70926518 0.71382637 0.70253165 0.7047619 0.70926518
|
|
0.7047619 0.70347003 0.70347003 0.70793651]
|
|
|
|
mean value: 0.7066295113545357
|
|
|
|
key: test_recall
|
|
value: [0.92 0.96 0.92 0.8 0.8 0.84
|
|
0.88 0.95833333 0.95833333 0.91666667]
|
|
|
|
mean value: 0.8953333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.57538462 0.51846154 0.57538462 0.525 0.48333333 0.46166667
|
|
0.48166667 0.59455128 0.59455128 0.57371795]
|
|
|
|
mean value: 0.5383717948717949
|
|
|
|
key: train_roc_auc
|
|
value: [0.59292035 0.59734513 0.60619469 0.5877193 0.59210526 0.60087719
|
|
0.59210526 0.5840708 0.5840708 0.59292035]
|
|
|
|
mean value: 0.5930329141437665
|
|
|
|
key: test_jcc
|
|
value: [0.65714286 0.64864865 0.65714286 0.58823529 0.57142857 0.58333333
|
|
0.61111111 0.67647059 0.67647059 0.64705882]
|
|
|
|
mean value: 0.6317042672925026
|
|
|
|
key: train_jcc
|
|
value: [0.70700637 0.70926518 0.71382637 0.70253165 0.7047619 0.70926518
|
|
0.7047619 0.70347003 0.70347003 0.70793651]
|
|
|
|
mean value: 0.7066295113545357
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0351007 0.03754663 0.03908205 0.03760624 0.03315449 0.03761458
|
|
0.03743911 0.03727436 0.03750277 0.03959727]
|
|
|
|
mean value: 0.03719182014465332
|
|
|
|
key: score_time
|
|
value: [0.02327967 0.02323771 0.02215171 0.02249551 0.02028251 0.02091217
|
|
0.02243042 0.02017927 0.02151871 0.02093935]
|
|
|
|
mean value: 0.021742701530456543
|
|
|
|
key: test_mcc
|
|
value: [0.94415495 0.82874193 0.94211144 0.63 0.93883452 0.87841046
|
|
0.87841046 0.82689823 0.82041917 0.94135745]
|
|
|
|
mean value: 0.8629338607810424
|
|
|
|
key: train_mcc
|
|
value: [0.94665847 0.93978223 0.93978223 0.94039705 0.94697662 0.94697662
|
|
0.94677671 0.95335815 0.94673482 0.95335815]
|
|
|
|
mean value: 0.9460801053122094
|
|
|
|
key: test_accuracy
|
|
value: [0.97368421 0.92105263 0.97368421 0.83783784 0.97297297 0.94594595
|
|
0.94594595 0.91891892 0.91891892 0.97297297]
|
|
|
|
mean value: 0.9381934566145093
|
|
|
|
key: train_accuracy
|
|
value: [0.9761194 0.97313433 0.97313433 0.97321429 0.97619048 0.97619048
|
|
0.97619048 0.97916667 0.97619048 0.97916667]
|
|
|
|
mean value: 0.9758697583511016
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.94339623 0.98039216 0.88 0.98039216 0.96153846
|
|
0.96153846 0.94117647 0.93877551 0.97959184]
|
|
|
|
mean value: 0.9546393117479213
|
|
|
|
key: train_fscore
|
|
value: [0.98222222 0.97995546 0.97995546 0.98004435 0.98222222 0.98222222
|
|
0.98214286 0.98447894 0.98230088 0.98447894]
|
|
|
|
mean value: 0.9820023539200488
|
|
|
|
key: test_precision
|
|
value: [1. 0.89285714 0.96153846 0.88 0.96153846 0.92592593
|
|
0.92592593 0.88888889 0.92 0.96 ]
|
|
|
|
mean value: 0.9316674806674807
|
|
|
|
key: train_precision
|
|
value: [0.96929825 0.969163 0.969163 0.9650655 0.96929825 0.96929825
|
|
0.97345133 0.97368421 0.96943231 0.97368421]
|
|
|
|
mean value: 0.9701538293111679
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 0.88 1. 1.
|
|
1. 1. 0.95833333 1. ]
|
|
|
|
mean value: 0.9798333333333333
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.99099099 0.99099099 0.9954955 0.9954955 0.9954955
|
|
0.99099099 0.9955157 0.9955157 0.9955157 ]
|
|
|
|
mean value: 0.9941502040156749
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.88461538 0.96153846 0.815 0.95833333 0.91666667
|
|
0.91666667 0.88461538 0.90224359 0.96153846]
|
|
|
|
mean value: 0.9181217948717949
|
|
|
|
key: train_roc_auc
|
|
value: [0.9667743 0.96452204 0.96452204 0.96266003 0.96704599 0.96704599
|
|
0.96917971 0.97120917 0.9667844 0.97120917]
|
|
|
|
mean value: 0.9670952852092037
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.89285714 0.96153846 0.78571429 0.96153846 0.92592593
|
|
0.92592593 0.88888889 0.88461538 0.96 ]
|
|
|
|
mean value: 0.9147004477004477
|
|
|
|
key: train_jcc
|
|
value: [0.9650655 0.96069869 0.96069869 0.96086957 0.9650655 0.9650655
|
|
0.96491228 0.96943231 0.96521739 0.96943231]
|
|
|
|
mean value: 0.9646457752507337
|
|
|
|
MCC on Blind test: 0.86
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25060892 0.25483131 0.27242565 0.25408888 0.44746661 0.27057695
|
|
0.25057507 0.26308417 0.25613451 0.25254703]
|
|
|
|
mean value: 0.2772339105606079
|
|
|
|
key: score_time
|
|
value: [0.02252483 0.02244163 0.02069497 0.0212121 0.02924228 0.02368617
|
|
0.02381682 0.0236721 0.02256131 0.02059531]
|
|
|
|
mean value: 0.023044753074645995
|
|
|
|
key: test_mcc
|
|
value: [0.94415495 0.82874193 0.94415495 0.69948088 0.93883452 0.87841046
|
|
0.87841046 0.82689823 0.82041917 0.94135745]
|
|
|
|
mean value: 0.8700863001234621
|
|
|
|
key: train_mcc
|
|
value: [0.94665847 0.95314792 0.94645906 0.95356818 0.96678929 0.94697662
|
|
0.94677671 0.95335815 0.94673482 0.95335815]
|
|
|
|
mean value: 0.951382738200648
|
|
|
|
key: test_accuracy
|
|
value: [0.97368421 0.92105263 0.97368421 0.86486486 0.97297297 0.94594595
|
|
0.94594595 0.91891892 0.91891892 0.97297297]
|
|
|
|
mean value: 0.9408961593172119
|
|
|
|
key: train_accuracy
|
|
value: [0.9761194 0.97910448 0.9761194 0.97916667 0.98511905 0.97619048
|
|
0.97619048 0.97916667 0.97619048 0.97916667]
|
|
|
|
mean value: 0.9782533759772566
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.94339623 0.97959184 0.89795918 0.98039216 0.96153846
|
|
0.96153846 0.94117647 0.93877551 0.97959184]
|
|
|
|
mean value: 0.956355198102463
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:107: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:110: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.98222222 0.98434004 0.98214286 0.9844098 0.98881432 0.98222222
|
|
0.98214286 0.98447894 0.98230088 0.98447894]
|
|
|
|
mean value: 0.983755307705348
|
|
|
|
key: test_precision
|
|
value: [1. 0.89285714 1. 0.91666667 0.96153846 0.92592593
|
|
0.92592593 0.88888889 0.92 0.96 ]
|
|
|
|
mean value: 0.9391803011803012
|
|
|
|
key: train_precision
|
|
value: [0.96929825 0.97777778 0.97345133 0.97356828 0.98222222 0.96929825
|
|
0.97345133 0.97368421 0.96943231 0.97368421]
|
|
|
|
mean value: 0.9735868163496765
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 0.88 1. 1.
|
|
1. 1. 0.95833333 1. ]
|
|
|
|
mean value: 0.9758333333333333
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.99099099 0.99099099 0.9954955 0.9954955 0.9954955
|
|
0.99099099 0.9955157 0.9955157 0.9955157 ]
|
|
|
|
mean value: 0.9941502040156749
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.88461538 0.98 0.85666667 0.95833333 0.91666667
|
|
0.91666667 0.88461538 0.90224359 0.96153846]
|
|
|
|
mean value: 0.9241346153846154
|
|
|
|
key: train_roc_auc
|
|
value: [0.9667743 0.9733716 0.96894682 0.97143196 0.98020389 0.96704599
|
|
0.96917971 0.97120917 0.9667844 0.97120917]
|
|
|
|
mean value: 0.9706157012936627
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.89285714 0.96 0.81481481 0.96153846 0.92592593
|
|
0.92592593 0.88888889 0.88461538 0.96 ]
|
|
|
|
mean value: 0.9174566544566545
|
|
|
|
key: train_jcc
|
|
value: [0.9650655 0.969163 0.96491228 0.96929825 0.97787611 0.9650655
|
|
0.96491228 0.96943231 0.96521739 0.96943231]
|
|
|
|
mean value: 0.9680374933299069
|
|
|
|
MCC on Blind test: 0.86
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0363636 0.03712535 0.05885887 0.05647492 0.04507446 0.03613973
|
|
0.03625989 0.03593516 0.03642297 0.036026 ]
|
|
|
|
mean value: 0.04146809577941894
|
|
|
|
key: score_time
|
|
value: [0.01377487 0.0133152 0.02077579 0.01361918 0.01332784 0.01323223
|
|
0.01328897 0.0136342 0.01316261 0.01340938]
|
|
|
|
mean value: 0.01415402889251709
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 0.92295821 0.80064077 0.6821865 0.80138769 0.80138769
|
|
0.95993456 0.75793094 0.75712849 0.92153718]
|
|
|
|
mean value: 0.8328050218431607
|
|
|
|
key: train_mcc
|
|
value: [0.87910109 0.86958995 0.91464643 0.90104717 0.8834738 0.88372624
|
|
0.87887912 0.89245234 0.89684997 0.89684997]
|
|
|
|
mean value: 0.8896616098177123
|
|
|
|
key: test_accuracy
|
|
value: [0.96 0.96 0.9 0.84 0.89795918 0.89795918
|
|
0.97959184 0.87755102 0.87755102 0.95918367]
|
|
|
|
mean value: 0.9149795918367347
|
|
|
|
key: train_accuracy
|
|
value: [0.93918919 0.93468468 0.95720721 0.95045045 0.94157303 0.94157303
|
|
0.93932584 0.94606742 0.94831461 0.94831461]
|
|
|
|
mean value: 0.9446700070857375
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.96153846 0.90196078 0.83333333 0.90566038 0.90566038
|
|
0.98039216 0.88 0.86956522 0.96 ]
|
|
|
|
mean value: 0.9156444041489884
|
|
|
|
key: train_fscore
|
|
value: [0.94039735 0.93541203 0.95768374 0.95089286 0.94222222 0.94247788
|
|
0.93986637 0.94690265 0.94900222 0.94900222]
|
|
|
|
mean value: 0.945385953400634
|
|
|
|
key: test_precision
|
|
value: [1. 0.92592593 0.88461538 0.86956522 0.85714286 0.85714286
|
|
0.96153846 0.84615385 0.90909091 0.92307692]
|
|
|
|
mean value: 0.9034252382078469
|
|
|
|
key: train_precision
|
|
value: [0.92207792 0.92511013 0.94713656 0.94247788 0.92982456 0.92608696
|
|
0.92951542 0.93449782 0.93859649 0.93859649]
|
|
|
|
mean value: 0.9333920229696836
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.92 0.8 0.96 0.96
|
|
1. 0.91666667 0.83333333 1. ]
|
|
|
|
mean value: 0.931
|
|
|
|
key: train_recall
|
|
value: [0.95945946 0.94594595 0.96846847 0.95945946 0.95495495 0.95945946
|
|
0.95045045 0.95964126 0.95964126 0.95964126]
|
|
|
|
mean value: 0.9577121965014341
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.96 0.9 0.84 0.89666667 0.89666667
|
|
0.97916667 0.87833333 0.87666667 0.96 ]
|
|
|
|
mean value: 0.9147500000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.93918919 0.93468468 0.95720721 0.95045045 0.94160304 0.94161314
|
|
0.93935079 0.94603684 0.9482891 0.9482891 ]
|
|
|
|
mean value: 0.9446713529673171
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.92592593 0.82142857 0.71428571 0.82758621 0.82758621
|
|
0.96153846 0.78571429 0.76923077 0.92307692]
|
|
|
|
mean value: 0.8476373064993755
|
|
|
|
key: train_jcc
|
|
value: [0.8875 0.87866109 0.91880342 0.90638298 0.8907563 0.89121339
|
|
0.88655462 0.89915966 0.90295359 0.90295359]
|
|
|
|
mean value: 0.8964938635745345
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.80392122 0.93377447 0.83295512 0.78982854 0.91435003 0.75680423
|
|
0.78061891 0.88005304 0.78918362 0.80922914]
|
|
|
|
mean value: 0.829071831703186
|
|
|
|
key: score_time
|
|
value: [0.0134244 0.01726675 0.01343942 0.01431155 0.0140357 0.01349711
|
|
0.01410866 0.01354098 0.01581907 0.01338387]
|
|
|
|
mean value: 0.014282751083374023
|
|
|
|
key: test_mcc
|
|
value: [0.88070485 0.92295821 0.84270097 0.84270097 0.87833333 0.95993456
|
|
0.92153718 0.83666667 0.87813144 0.88443328]
|
|
|
|
mean value: 0.8848101451360941
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.9955056 1. 1. 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.9995505595230147
|
|
|
|
key: test_accuracy
|
|
value: [0.94 0.96 0.92 0.92 0.93877551 0.97959184
|
|
0.95918367 0.91836735 0.93877551 0.93877551]
|
|
|
|
mean value: 0.9413469387755102
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99774775 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997747747747747
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.95833333 0.92307692 0.91666667 0.93877551 0.98039216
|
|
0.95833333 0.91666667 0.93617021 0.94117647]
|
|
|
|
mean value: 0.9408366783702025
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99775281 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997752808988765
|
|
|
|
key: test_precision
|
|
value: [0.95833333 1. 0.88888889 0.95652174 0.95833333 0.96153846
|
|
1. 0.91666667 0.95652174 0.88888889]
|
|
|
|
mean value: 0.9485693050910442
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.9955157 1. 1. 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.9995515695067264
|
|
|
|
key: test_recall
|
|
value: [0.92 0.92 0.96 0.88 0.92 1.
|
|
0.92 0.91666667 0.91666667 1. ]
|
|
|
|
mean value: 0.9353333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.96 0.92 0.92 0.93916667 0.97916667
|
|
0.96 0.91833333 0.93833333 0.94 ]
|
|
|
|
mean value: 0.9415
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99774775 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997747747747747
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.92 0.85714286 0.84615385 0.88461538 0.96153846
|
|
0.92 0.84615385 0.88 0.88888889]
|
|
|
|
mean value: 0.888910866910867
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.9955157 1. 1. 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.9995515695067264
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01459169 0.01191235 0.01006532 0.00987005 0.00970411 0.00970721
|
|
0.00996757 0.01006603 0.00980878 0.00993657]
|
|
|
|
mean value: 0.010562968254089356
|
|
|
|
key: score_time
|
|
value: [0.01508784 0.00921392 0.00890803 0.00877428 0.008636 0.00863361
|
|
0.00878286 0.00872254 0.00874543 0.00873637]
|
|
|
|
mean value: 0.009424090385437012
|
|
|
|
key: test_mcc
|
|
value: [0.76 0.56407607 0.60192927 0.76244374 0.74389879 0.68145382
|
|
0.87833333 0.63333333 0.59166667 0.7202771 ]
|
|
|
|
mean value: 0.693741212650438
|
|
|
|
key: train_mcc
|
|
value: [0.74969739 0.70025435 0.74058019 0.76955384 0.71592872 0.72564618
|
|
0.73818018 0.71852959 0.7067683 0.6851576 ]
|
|
|
|
mean value: 0.725029634017063
|
|
|
|
key: test_accuracy
|
|
value: [0.88 0.78 0.8 0.88 0.85714286 0.83673469
|
|
0.93877551 0.81632653 0.79591837 0.85714286]
|
|
|
|
mean value: 0.844204081632653
|
|
|
|
key: train_accuracy
|
|
value: [0.87387387 0.8490991 0.86711712 0.88288288 0.85617978 0.86067416
|
|
0.86741573 0.85842697 0.85168539 0.84044944]
|
|
|
|
mean value: 0.860780443364713
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.79245283 0.80769231 0.875 0.87719298 0.85185185
|
|
0.93877551 0.81632653 0.79166667 0.8627451 ]
|
|
|
|
mean value: 0.8493703777711188
|
|
|
|
key: train_fscore
|
|
value: [0.87826087 0.85466377 0.87526427 0.88841202 0.86266094 0.86752137
|
|
0.87311828 0.85382831 0.85897436 0.8492569 ]
|
|
|
|
mean value: 0.8661961088497621
|
|
|
|
key: test_precision
|
|
value: [0.88 0.75 0.77777778 0.91304348 0.78125 0.79310345
|
|
0.95833333 0.8 0.79166667 0.81481481]
|
|
|
|
mean value: 0.8259989519129324
|
|
|
|
key: train_precision
|
|
value: [0.8487395 0.82426778 0.8247012 0.84836066 0.82377049 0.82520325
|
|
0.83539095 0.88461538 0.82040816 0.80645161]
|
|
|
|
mean value: 0.8341908980303699
|
|
|
|
key: test_recall
|
|
value: [0.88 0.84 0.84 0.84 1. 0.92
|
|
0.92 0.83333333 0.79166667 0.91666667]
|
|
|
|
mean value: 0.8781666666666667
|
|
|
|
key: train_recall
|
|
value: [0.90990991 0.88738739 0.93243243 0.93243243 0.90540541 0.91441441
|
|
0.91441441 0.82511211 0.90134529 0.89686099]
|
|
|
|
mean value: 0.9019714782046621
|
|
|
|
key: test_roc_auc
|
|
value: [0.88 0.78 0.8 0.88 0.85416667 0.835
|
|
0.93916667 0.81666667 0.79583333 0.85833333]
|
|
|
|
mean value: 0.8439166666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.87387387 0.8490991 0.86711712 0.88288288 0.85629015 0.86079465
|
|
0.86752111 0.858502 0.85157355 0.84032239]
|
|
|
|
mean value: 0.8607976810891609
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.65625 0.67741935 0.77777778 0.78125 0.74193548
|
|
0.88461538 0.68965517 0.65517241 0.75862069]
|
|
|
|
mean value: 0.7408410562679194
|
|
|
|
key: train_jcc
|
|
value: [0.78294574 0.74621212 0.77819549 0.7992278 0.75849057 0.76603774
|
|
0.77480916 0.74493927 0.75280899 0.73800738]
|
|
|
|
mean value: 0.7641674247880876
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01012707 0.01006651 0.01009059 0.01002502 0.01017642 0.01020885
|
|
0.01007009 0.01008034 0.01021719 0.01031804]
|
|
|
|
mean value: 0.01013801097869873
|
|
|
|
key: score_time
|
|
value: [0.0087781 0.00873303 0.00881004 0.00882101 0.00873613 0.00878119
|
|
0.0087359 0.00880241 0.0087862 0.00874043]
|
|
|
|
mean value: 0.008772444725036622
|
|
|
|
key: test_mcc
|
|
value: [0.72524067 0.72057669 0.56407607 0.68887476 0.72844463 0.755
|
|
0.69595532 0.63272208 0.80138769 0.79666667]
|
|
|
|
mean value: 0.7108944579476518
|
|
|
|
key: train_mcc
|
|
value: [0.73079815 0.74850752 0.74823373 0.73948935 0.74440499 0.74037075
|
|
0.71741402 0.74943432 0.73143269 0.74009857]
|
|
|
|
mean value: 0.7390184089399621
|
|
|
|
key: test_accuracy
|
|
value: [0.86 0.86 0.78 0.84 0.85714286 0.87755102
|
|
0.83673469 0.81632653 0.89795918 0.89795918]
|
|
|
|
mean value: 0.8523673469387755
|
|
|
|
key: train_accuracy
|
|
value: [0.86486486 0.87387387 0.87387387 0.86936937 0.87191011 0.86966292
|
|
0.85842697 0.8741573 0.86516854 0.86966292]
|
|
|
|
mean value: 0.8690970746026926
|
|
|
|
key: test_fscore
|
|
value: [0.85106383 0.8627451 0.76595745 0.82608696 0.87272727 0.88
|
|
0.81818182 0.80851064 0.88888889 0.89795918]
|
|
|
|
mean value: 0.8472121132926022
|
|
|
|
key: train_fscore
|
|
value: [0.86111111 0.87096774 0.87155963 0.86635945 0.86896552 0.86574074
|
|
0.85517241 0.87096774 0.86175115 0.86697248]
|
|
|
|
mean value: 0.8659567975927386
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.84615385 0.81818182 0.9047619 0.8 0.88
|
|
0.94736842 0.82608696 0.95238095 0.88 ]
|
|
|
|
mean value: 0.8764024808143801
|
|
|
|
key: train_precision
|
|
value: [0.88571429 0.89150943 0.88785047 0.88679245 0.88732394 0.89047619
|
|
0.87323944 0.8957346 0.88625592 0.88732394]
|
|
|
|
mean value: 0.8872220675543325
|
|
|
|
key: test_recall
|
|
value: [0.8 0.88 0.72 0.76 0.96 0.88
|
|
0.72 0.79166667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.8261666666666667
|
|
|
|
key: train_recall
|
|
value: [0.83783784 0.85135135 0.85585586 0.84684685 0.85135135 0.84234234
|
|
0.83783784 0.84753363 0.83856502 0.84753363]
|
|
|
|
mean value: 0.8457055710418939
|
|
|
|
key: test_roc_auc
|
|
value: [0.86 0.86 0.78 0.84 0.855 0.8775
|
|
0.83916667 0.81583333 0.89666667 0.89833333]
|
|
|
|
mean value: 0.85225
|
|
|
|
key: train_roc_auc
|
|
value: [0.86486486 0.87387387 0.87387387 0.86936937 0.87186402 0.86960166
|
|
0.8583808 0.87421727 0.86522846 0.86971276]
|
|
|
|
mean value: 0.8690986951076637
|
|
|
|
key: test_jcc
|
|
value: [0.74074074 0.75862069 0.62068966 0.7037037 0.77419355 0.78571429
|
|
0.69230769 0.67857143 0.8 0.81481481]
|
|
|
|
mean value: 0.7369356559067348
|
|
|
|
key: train_jcc
|
|
value: [0.75609756 0.77142857 0.77235772 0.76422764 0.76829268 0.76326531
|
|
0.74698795 0.77142857 0.75708502 0.76518219]
|
|
|
|
mean value: 0.7636353217020652
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.0096426 0.01018953 0.00955534 0.00947046 0.01088929 0.00988102
|
|
0.01127505 0.01162434 0.01263952 0.01017809]
|
|
|
|
mean value: 0.010534524917602539
|
|
|
|
key: score_time
|
|
value: [0.01782751 0.02158022 0.01690531 0.01754045 0.01796961 0.01726055
|
|
0.01853037 0.01824474 0.01929545 0.01810694]
|
|
|
|
mean value: 0.018326115608215333
|
|
|
|
key: test_mcc
|
|
value: [0.56407607 0.6821865 0.52678658 0.6 0.46911585 0.5943247
|
|
0.63819901 0.51 0.57236448 0.75712849]
|
|
|
|
mean value: 0.5914181691221945
|
|
|
|
key: train_mcc
|
|
value: [0.71243486 0.71832543 0.73639651 0.727896 0.72236474 0.71718755
|
|
0.70841703 0.71811613 0.70051372 0.73182356]
|
|
|
|
mean value: 0.7193475546229622
|
|
|
|
key: test_accuracy
|
|
value: [0.78 0.84 0.76 0.8 0.73469388 0.79591837
|
|
0.81632653 0.75510204 0.7755102 0.87755102]
|
|
|
|
mean value: 0.7935102040816326
|
|
|
|
key: train_accuracy
|
|
value: [0.85585586 0.85810811 0.86711712 0.86261261 0.86067416 0.85842697
|
|
0.85393258 0.85842697 0.8494382 0.86516854]
|
|
|
|
mean value: 0.8589761109424031
|
|
|
|
key: test_fscore
|
|
value: [0.76595745 0.83333333 0.73913043 0.8 0.74509804 0.79166667
|
|
0.80851064 0.75 0.73170732 0.86956522]
|
|
|
|
mean value: 0.7834969093569154
|
|
|
|
key: train_fscore
|
|
value: [0.85253456 0.85245902 0.8618267 0.85647059 0.85648148 0.85583524
|
|
0.85057471 0.85450346 0.84454756 0.86111111]
|
|
|
|
mean value: 0.8546344438252198
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.86956522 0.80952381 0.8 0.73076923 0.82608696
|
|
0.86363636 0.75 0.88235294 0.90909091]
|
|
|
|
mean value: 0.8259207246291645
|
|
|
|
key: train_precision
|
|
value: [0.87264151 0.88780488 0.89756098 0.89655172 0.88095238 0.86976744
|
|
0.8685446 0.88095238 0.875 0.88995215]
|
|
|
|
mean value: 0.8819728045044671
|
|
|
|
key: test_recall
|
|
value: [0.72 0.8 0.68 0.8 0.76 0.76
|
|
0.76 0.75 0.625 0.83333333]
|
|
|
|
mean value: 0.7488333333333334
|
|
|
|
key: train_recall
|
|
value: [0.83333333 0.81981982 0.82882883 0.81981982 0.83333333 0.84234234
|
|
0.83333333 0.82959641 0.8161435 0.83408072]
|
|
|
|
mean value: 0.8290631438613502
|
|
|
|
key: test_roc_auc
|
|
value: [0.78 0.84 0.76 0.8 0.73416667 0.79666667
|
|
0.8175 0.755 0.7725 0.87666667]
|
|
|
|
mean value: 0.79325
|
|
|
|
key: train_roc_auc
|
|
value: [0.85585586 0.85810811 0.86711712 0.86261261 0.86061286 0.8583909
|
|
0.8538864 0.8584919 0.84951319 0.86523856]
|
|
|
|
mean value: 0.8589827495657092
|
|
|
|
key: test_jcc
|
|
value: [0.62068966 0.71428571 0.5862069 0.66666667 0.59375 0.65517241
|
|
0.67857143 0.6 0.57692308 0.76923077]
|
|
|
|
mean value: 0.6461496621194897
|
|
|
|
key: train_jcc
|
|
value: [0.74297189 0.74285714 0.75720165 0.74897119 0.74898785 0.748
|
|
0.74 0.74596774 0.73092369 0.75609756]
|
|
|
|
mean value: 0.7461978721854738
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02625489 0.02099991 0.01983714 0.01957965 0.01957178 0.01941919
|
|
0.01912832 0.01870322 0.01893306 0.01915574]
|
|
|
|
mean value: 0.02015829086303711
|
|
|
|
key: score_time
|
|
value: [0.01269841 0.01199389 0.01168227 0.01149487 0.0113945 0.01112556
|
|
0.01110053 0.0110867 0.01103163 0.01116681]
|
|
|
|
mean value: 0.011477518081665038
|
|
|
|
key: test_mcc
|
|
value: [0.80064077 0.84270097 0.68887476 0.76 0.72844463 0.7645166
|
|
0.91833333 0.63819901 0.79666667 0.92153718]
|
|
|
|
mean value: 0.7859913924676449
|
|
|
|
key: train_mcc
|
|
value: [0.84544443 0.82736006 0.86233104 0.84482299 0.85000307 0.84464068
|
|
0.82359278 0.84459408 0.8361234 0.83254975]
|
|
|
|
mean value: 0.8411462281013202
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.92 0.84 0.88 0.85714286 0.87755102
|
|
0.95918367 0.81632653 0.89795918 0.95918367]
|
|
|
|
mean value: 0.890734693877551
|
|
|
|
key: train_accuracy
|
|
value: [0.92117117 0.91216216 0.93018018 0.92117117 0.92359551 0.92134831
|
|
0.91011236 0.92134831 0.91685393 0.91460674]
|
|
|
|
mean value: 0.919254985322401
|
|
|
|
key: test_fscore
|
|
value: [0.90196078 0.92307692 0.85185185 0.88 0.87272727 0.88888889
|
|
0.96 0.82352941 0.89795918 0.96 ]
|
|
|
|
mean value: 0.8959994316296838
|
|
|
|
key: train_fscore
|
|
value: [0.92440605 0.91576674 0.93246187 0.92407809 0.92640693 0.92374728
|
|
0.9137931 0.92407809 0.92008639 0.91845494]
|
|
|
|
mean value: 0.9223279477282558
|
|
|
|
key: test_precision
|
|
value: [0.88461538 0.88888889 0.79310345 0.88 0.8 0.82758621
|
|
0.96 0.77777778 0.88 0.92307692]
|
|
|
|
mean value: 0.8615048629531388
|
|
|
|
key: train_precision
|
|
value: [0.8879668 0.87966805 0.90295359 0.89121339 0.89166667 0.89451477
|
|
0.87603306 0.89495798 0.8875 0.88065844]
|
|
|
|
mean value: 0.8887132742248678
|
|
|
|
key: test_recall
|
|
value: [0.92 0.96 0.92 0.88 0.96 0.96
|
|
0.96 0.875 0.91666667 1. ]
|
|
|
|
mean value: 0.9351666666666667
|
|
|
|
key: train_recall
|
|
value: [0.96396396 0.95495495 0.96396396 0.95945946 0.96396396 0.95495495
|
|
0.95495495 0.95515695 0.95515695 0.95964126]
|
|
|
|
mean value: 0.9586171373166888
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.92 0.84 0.88 0.855 0.87583333
|
|
0.95916667 0.8175 0.89833333 0.96 ]
|
|
|
|
mean value: 0.8905833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.92117117 0.91216216 0.93018018 0.92117117 0.92368602 0.92142367
|
|
0.9102129 0.92127217 0.91676766 0.91450531]
|
|
|
|
mean value: 0.9192552417888741
|
|
|
|
key: test_jcc
|
|
value: [0.82142857 0.85714286 0.74193548 0.78571429 0.77419355 0.8
|
|
0.92307692 0.7 0.81481481 0.92307692]
|
|
|
|
mean value: 0.8141383407512439
|
|
|
|
key: train_jcc
|
|
value: [0.85943775 0.84462151 0.87346939 0.85887097 0.86290323 0.8582996
|
|
0.84126984 0.85887097 0.852 0.84920635]
|
|
|
|
mean value: 0.8558949599611555
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.07473278 1.77294016 1.73508358 1.80235052 1.49000359 1.80213141
|
|
2.13988519 2.21913028 1.70440102 2.30901408]
|
|
|
|
mean value: 1.8049672603607179
|
|
|
|
key: score_time
|
|
value: [0.01239324 0.01238942 0.01241302 0.01442647 0.01241326 0.01508927
|
|
0.0142796 0.0145545 0.01257992 0.0128932 ]
|
|
|
|
mean value: 0.013343191146850586
|
|
|
|
key: test_mcc
|
|
value: [0.88640526 0.92295821 0.84270097 0.76244374 0.83920658 0.88388348
|
|
0.96 0.87833333 0.69302938 0.92153718]
|
|
|
|
mean value: 0.8590498120777831
|
|
|
|
key: train_mcc
|
|
value: [0.97772549 0.9955056 1. 1. 0.99105141 0.9955157
|
|
0.9955157 0.98652661 0.9955156 0.9955156 ]
|
|
|
|
mean value: 0.9932871708980718
|
|
|
|
key: test_accuracy
|
|
value: [0.94 0.96 0.92 0.88 0.91836735 0.93877551
|
|
0.97959184 0.93877551 0.83673469 0.95918367]
|
|
|
|
mean value: 0.9271428571428572
|
|
|
|
key: train_accuracy
|
|
value: [0.98873874 0.99774775 1. 1. 0.99550562 0.99775281
|
|
0.99775281 0.99325843 0.99775281 0.99775281]
|
|
|
|
mean value: 0.9966261767385363
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 0.96153846 0.92307692 0.875 0.92307692 0.94339623
|
|
0.97959184 0.93877551 0.80952381 0.96 ]
|
|
|
|
mean value: 0.9250149903335945
|
|
|
|
key: train_fscore
|
|
value: [0.98886414 0.99775281 1. 1. 0.9955157 0.99775281
|
|
0.99775281 0.99328859 0.99776286 0.99776286]
|
|
|
|
mean value: 0.996645258224591
|
|
|
|
key: test_precision
|
|
value: [1. 0.92592593 0.88888889 0.91304348 0.88888889 0.89285714
|
|
1. 0.92 0.94444444 0.92307692]
|
|
|
|
mean value: 0.9297125692343083
|
|
|
|
key: train_precision
|
|
value: [0.97797357 0.9955157 1. 1. 0.99107143 0.9955157
|
|
0.9955157 0.99107143 0.99553571 0.99553571]
|
|
|
|
mean value: 0.9937734939198017
|
|
|
|
key: test_recall
|
|
value: [0.88 1. 0.96 0.84 0.96 1.
|
|
0.96 0.95833333 0.70833333 1. ]
|
|
|
|
mean value: 0.9266666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1.
|
|
0.9955157 1. 1. ]
|
|
|
|
mean value: 0.9995515695067264
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.96 0.92 0.88 0.9175 0.9375
|
|
0.98 0.93916667 0.83416667 0.96 ]
|
|
|
|
mean value: 0.9268333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.98873874 0.99774775 1. 1. 0.9955157 0.99775785
|
|
0.99775785 0.99325334 0.99774775 0.99774775]
|
|
|
|
mean value: 0.9966266715145639
|
|
|
|
key: test_jcc
|
|
value: [0.88 0.92592593 0.85714286 0.77777778 0.85714286 0.89285714
|
|
0.96 0.88461538 0.68 0.92307692]
|
|
|
|
mean value: 0.8638538868538869
|
|
|
|
key: train_jcc
|
|
value: [0.97797357 0.9955157 1. 1. 0.99107143 0.9955157
|
|
0.9955157 0.98666667 0.99553571 0.99553571]
|
|
|
|
mean value: 0.9933330177293256
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02911282 0.02037144 0.01997542 0.0215261 0.02112508 0.0209291
|
|
0.02215338 0.02235532 0.0229764 0.02401161]
|
|
|
|
mean value: 0.022453665733337402
|
|
|
|
key: score_time
|
|
value: [0.01282477 0.00952339 0.00922155 0.00892496 0.00909543 0.00876808
|
|
0.00880718 0.00887728 0.0087831 0.00890565]
|
|
|
|
mean value: 0.009373140335083009
|
|
|
|
key: test_mcc
|
|
value: [0.84270097 0.92 0.96076892 0.92 0.83973406 0.84757938
|
|
0.92153718 0.87813144 0.91833333 0.87813144]
|
|
|
|
mean value: 0.8926916724442335
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92 0.96 0.98 0.96 0.91836735 0.91836735
|
|
0.95918367 0.93877551 0.95918367 0.93877551]
|
|
|
|
mean value: 0.945265306122449
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.96 0.98039216 0.96 0.91666667 0.92592593
|
|
0.95833333 0.93617021 0.95833333 0.93617021]
|
|
|
|
mean value: 0.9448658508320585
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95652174 0.96 0.96153846 0.96 0.95652174 0.86206897
|
|
1. 0.95652174 0.95833333 0.95652174]
|
|
|
|
mean value: 0.9528027716910775
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88 0.96 1. 0.96 0.88 1.
|
|
0.92 0.91666667 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9391666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 0.96 0.98 0.96 0.91916667 0.91666667
|
|
0.96 0.93833333 0.95916667 0.93833333]
|
|
|
|
mean value: 0.9451666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.92307692 0.96153846 0.92307692 0.84615385 0.86206897
|
|
0.92 0.88 0.92 0.88 ]
|
|
|
|
mean value: 0.8962068965517241
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11684823 0.11725545 0.11889172 0.11789179 0.11825585 0.11692381
|
|
0.1193254 0.11840868 0.11967349 0.11858964]
|
|
|
|
mean value: 0.11820640563964843
|
|
|
|
key: score_time
|
|
value: [0.01796508 0.01847696 0.01771641 0.01772141 0.01785374 0.01796412
|
|
0.01933575 0.01795578 0.01847363 0.01786184]
|
|
|
|
mean value: 0.018132472038269044
|
|
|
|
key: test_mcc
|
|
value: [0.88070485 0.84270097 0.64051262 0.72057669 0.67333333 0.75712849
|
|
0.87813144 0.84852814 0.83666667 0.88443328]
|
|
|
|
mean value: 0.7962716467950685
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94 0.92 0.82 0.86 0.83673469 0.87755102
|
|
0.93877551 0.91836735 0.91836735 0.93877551]
|
|
|
|
mean value: 0.8968571428571429
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.92307692 0.82352941 0.85714286 0.84 0.88461538
|
|
0.94117647 0.92307692 0.91666667 0.94117647]
|
|
|
|
mean value: 0.8989236617724012
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95833333 0.88888889 0.80769231 0.875 0.84 0.85185185
|
|
0.92307692 0.85714286 0.91666667 0.88888889]
|
|
|
|
mean value: 0.8807541717541717
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.92 0.96 0.84 0.84 0.84 0.92
|
|
0.96 1. 0.91666667 1. ]
|
|
|
|
mean value: 0.9196666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.92 0.82 0.86 0.83666667 0.87666667
|
|
0.93833333 0.92 0.91833333 0.94 ]
|
|
|
|
mean value: 0.8969999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.85714286 0.7 0.75 0.72413793 0.79310345
|
|
0.88888889 0.85714286 0.84615385 0.88888889]
|
|
|
|
mean value: 0.8190074102143068
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01068354 0.01092267 0.01057434 0.01040769 0.01056528 0.01019478
|
|
0.01031733 0.01011038 0.0101707 0.01026511]
|
|
|
|
mean value: 0.010421180725097656
|
|
|
|
key: score_time
|
|
value: [0.0088346 0.00879955 0.00907588 0.00932741 0.00952077 0.00864267
|
|
0.00866199 0.00880027 0.00898933 0.0089488 ]
|
|
|
|
mean value: 0.008960127830505371
|
|
|
|
key: test_mcc
|
|
value: [0.64051262 0.4532471 0.44 0.6 0.43071846 0.52366061
|
|
0.34891534 0.6750504 0.59166667 0.55091896]
|
|
|
|
mean value: 0.5254690163225314
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.82 0.72 0.72 0.8 0.71428571 0.75510204
|
|
0.67346939 0.83673469 0.79591837 0.7755102 ]
|
|
|
|
mean value: 0.7611020408163265
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81632653 0.68181818 0.72 0.8 0.70833333 0.78571429
|
|
0.66666667 0.82608696 0.79166667 0.76595745]
|
|
|
|
mean value: 0.756257006814163
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.78947368 0.72 0.8 0.73913043 0.70967742
|
|
0.69565217 0.86363636 0.79166667 0.7826087 ]
|
|
|
|
mean value: 0.7725178771549555
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 0.72 0.8 0.68 0.88
|
|
0.64 0.79166667 0.79166667 0.75 ]
|
|
|
|
mean value: 0.7453333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.82 0.72 0.72 0.8 0.715 0.7525
|
|
0.67416667 0.83583333 0.79583333 0.775 ]
|
|
|
|
mean value: 0.7608333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.68965517 0.51724138 0.5625 0.66666667 0.5483871 0.64705882
|
|
0.5 0.7037037 0.65517241 0.62068966]
|
|
|
|
mean value: 0.6111074911363631
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.74440408 1.75210881 1.73775601 1.73037267 1.74058366 1.73203635
|
|
1.7638557 1.73887849 1.74242353 1.74032068]
|
|
|
|
mean value: 1.742273998260498
|
|
|
|
key: score_time
|
|
value: [0.10545039 0.09120202 0.09701252 0.09037066 0.09080529 0.09075236
|
|
0.09023309 0.09100056 0.09106612 0.09486413]
|
|
|
|
mean value: 0.09327571392059326
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.96076892 0.92295821 0.84 0.95993456 0.88388348
|
|
1. 0.92153718 0.96 0.87833333]
|
|
|
|
mean value: 0.9288184599549806
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.98 0.96 0.92 0.97959184 0.93877551
|
|
1. 0.95918367 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9635918367346938
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 0.96153846 0.92 0.98039216 0.94339623
|
|
1. 0.96 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9643678185352516
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 0.92592593 0.92 0.96153846 0.89285714
|
|
1. 0.92307692 0.96 0.92 ]
|
|
|
|
mean value: 0.9464936914936916
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 0.92 1. 1.
|
|
1. 1. 1. 0.95833333]
|
|
|
|
mean value: 0.9838333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.98 0.96 0.92 0.97916667 0.9375
|
|
1. 0.96 0.98 0.93916667]
|
|
|
|
mean value: 0.9635833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 0.92592593 0.85185185 0.96153846 0.89285714
|
|
1. 0.92307692 0.96 0.88461538]
|
|
|
|
mean value: 0.9321404151404151
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.95252872 1.00840211 0.97950029 0.94470716 0.9975152 0.95645308
|
|
0.96747565 0.98190379 0.99789357 1.00961471]
|
|
|
|
mean value: 0.9795994281768798
|
|
|
|
key: score_time
|
|
value: [0.29509425 0.20915961 0.26488709 0.24643779 0.20214653 0.20617676
|
|
0.18060374 0.25345802 0.23302841 0.28940439]
|
|
|
|
mean value: 0.23803966045379638
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.92295821 0.88070485 0.84 0.92128466 0.84757938
|
|
1. 0.88443328 0.87833333 0.87833333]
|
|
|
|
mean value: 0.9014395963357796
|
|
|
|
key: train_mcc
|
|
value: [0.96024889 0.95993678 0.96431623 0.97313095 0.96439761 0.97338915
|
|
0.96878538 0.96439334 0.96002279 0.97338596]
|
|
|
|
mean value: 0.9662007063543757
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.96 0.94 0.92 0.95918367 0.91836735
|
|
1. 0.93877551 0.93877551 0.93877551]
|
|
|
|
mean value: 0.9493877551020409
|
|
|
|
key: train_accuracy
|
|
value: [0.97972973 0.97972973 0.98198198 0.98648649 0.98202247 0.98651685
|
|
0.98426966 0.98202247 0.97977528 0.98651685]
|
|
|
|
mean value: 0.9829051523433546
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.96153846 0.94117647 0.92 0.96153846 0.92592593
|
|
1. 0.94117647 0.93877551 0.93877551]
|
|
|
|
mean value: 0.9508498647322177
|
|
|
|
key: train_fscore
|
|
value: [0.98013245 0.98004435 0.98222222 0.98660714 0.98222222 0.98666667
|
|
0.9844098 0.98230088 0.98013245 0.98672566]
|
|
|
|
mean value: 0.9831463848755642
|
|
|
|
key: test_precision
|
|
value: [1. 0.92592593 0.92307692 0.92 0.92592593 0.86206897
|
|
1. 0.88888889 0.92 0.92 ]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
0.9285886629334905
|
|
|
|
key: train_precision
|
|
value: [0.96103896 0.9650655 0.96929825 0.97787611 0.96929825 0.97368421
|
|
0.97356828 0.96943231 0.96521739 0.97379913]
|
|
|
|
mean value: 0.9698278385462152
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 0.92 1. 1.
|
|
1. 1. 0.95833333 0.95833333]
|
|
|
|
mean value: 0.9756666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.9954955 0.9954955 0.9954955 0.9954955 1. 0.9954955
|
|
0.9955157 0.9955157 1. ]
|
|
|
|
mean value: 0.9968508867612007
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96 0.94 0.92 0.95833333 0.91666667
|
|
1. 0.94 0.93916667 0.93916667]
|
|
|
|
mean value: 0.9493333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.97972973 0.97972973 0.98198198 0.98648649 0.98205268 0.98654709
|
|
0.98429483 0.98199208 0.97973983 0.98648649]
|
|
|
|
mean value: 0.9829040924332405
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.92592593 0.88888889 0.85185185 0.92592593 0.86206897
|
|
1. 0.88888889 0.88461538 0.88461538]
|
|
|
|
mean value: 0.9072781216229492
|
|
|
|
key: train_jcc
|
|
value: [0.96103896 0.96086957 0.9650655 0.97356828 0.9650655 0.97368421
|
|
0.96929825 0.96521739 0.96103896 0.97379913]
|
|
|
|
mean value: 0.9668645747682705
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02431059 0.01023769 0.01161766 0.01083589 0.01079249 0.0104239
|
|
0.0102613 0.01045012 0.01072311 0.01129961]
|
|
|
|
mean value: 0.012095236778259277
|
|
|
|
key: score_time
|
|
value: [0.01025271 0.0087657 0.00925159 0.00904131 0.00911713 0.00924492
|
|
0.00965071 0.0095737 0.00924301 0.00951958]
|
|
|
|
mean value: 0.009366035461425781
|
|
|
|
key: test_mcc
|
|
value: [0.72524067 0.72057669 0.56407607 0.68887476 0.72844463 0.755
|
|
0.69595532 0.63272208 0.80138769 0.79666667]
|
|
|
|
mean value: 0.7108944579476518
|
|
|
|
key: train_mcc
|
|
value: [0.73079815 0.74850752 0.74823373 0.73948935 0.74440499 0.74037075
|
|
0.71741402 0.74943432 0.73143269 0.74009857]
|
|
|
|
mean value: 0.7390184089399621
|
|
|
|
key: test_accuracy
|
|
value: [0.86 0.86 0.78 0.84 0.85714286 0.87755102
|
|
0.83673469 0.81632653 0.89795918 0.89795918]
|
|
|
|
mean value: 0.8523673469387755
|
|
|
|
key: train_accuracy
|
|
value: [0.86486486 0.87387387 0.87387387 0.86936937 0.87191011 0.86966292
|
|
0.85842697 0.8741573 0.86516854 0.86966292]
|
|
|
|
mean value: 0.8690970746026926
|
|
|
|
key: test_fscore
|
|
value: [0.85106383 0.8627451 0.76595745 0.82608696 0.87272727 0.88
|
|
0.81818182 0.80851064 0.88888889 0.89795918]
|
|
|
|
mean value: 0.8472121132926022
|
|
|
|
key: train_fscore
|
|
value: [0.86111111 0.87096774 0.87155963 0.86635945 0.86896552 0.86574074
|
|
0.85517241 0.87096774 0.86175115 0.86697248]
|
|
|
|
mean value: 0.8659567975927386
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.84615385 0.81818182 0.9047619 0.8 0.88
|
|
0.94736842 0.82608696 0.95238095 0.88 ]
|
|
|
|
mean value: 0.8764024808143801
|
|
|
|
key: train_precision
|
|
value: [0.88571429 0.89150943 0.88785047 0.88679245 0.88732394 0.89047619
|
|
0.87323944 0.8957346 0.88625592 0.88732394]
|
|
|
|
mean value: 0.8872220675543325
|
|
|
|
key: test_recall
|
|
value: [0.8 0.88 0.72 0.76 0.96 0.88
|
|
0.72 0.79166667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.8261666666666667
|
|
|
|
key: train_recall
|
|
value: [0.83783784 0.85135135 0.85585586 0.84684685 0.85135135 0.84234234
|
|
0.83783784 0.84753363 0.83856502 0.84753363]
|
|
|
|
mean value: 0.8457055710418939
|
|
|
|
key: test_roc_auc
|
|
value: [0.86 0.86 0.78 0.84 0.855 0.8775
|
|
0.83916667 0.81583333 0.89666667 0.89833333]
|
|
|
|
mean value: 0.85225
|
|
|
|
key: train_roc_auc
|
|
value: [0.86486486 0.87387387 0.87387387 0.86936937 0.87186402 0.86960166
|
|
0.8583808 0.87421727 0.86522846 0.86971276]
|
|
|
|
mean value: 0.8690986951076637
|
|
|
|
key: test_jcc
|
|
value: [0.74074074 0.75862069 0.62068966 0.7037037 0.77419355 0.78571429
|
|
0.69230769 0.67857143 0.8 0.81481481]
|
|
|
|
mean value: 0.7369356559067348
|
|
|
|
key: train_jcc
|
|
value: [0.75609756 0.77142857 0.77235772 0.76422764 0.76829268 0.76326531
|
|
0.74698795 0.77142857 0.75708502 0.76518219]
|
|
|
|
mean value: 0.7636353217020652
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.19613576 0.06711435 0.06627202 0.07392812 0.06519127 0.07781029
|
|
0.06793761 0.0762372 0.06753373 0.07414198]
|
|
|
|
mean value: 0.08323023319244385
|
|
|
|
key: score_time
|
|
value: [0.0114038 0.01079202 0.01051354 0.01081872 0.01053476 0.01091743
|
|
0.01046109 0.01104116 0.01052165 0.0108633 ]
|
|
|
|
mean value: 0.010786747932434082
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 1. 0.92295821 0.92 0.95993456 0.84757938
|
|
0.96 0.96 0.95993456 0.83666667]
|
|
|
|
mean value: 0.932784229548486
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98 1. 0.96 0.96 0.97959184 0.91836735
|
|
0.97959184 0.97959184 0.97959184 0.91836735]
|
|
|
|
mean value: 0.9655102040816327
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 1. 0.96153846 0.96 0.98039216 0.92592593
|
|
0.97959184 0.97959184 0.9787234 0.91666667]
|
|
|
|
mean value: 0.96620221254532
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.92592593 0.96 0.96153846 0.86206897
|
|
1. 0.96 1. 0.91666667]
|
|
|
|
mean value: 0.9586200019648295
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 0.96 1. 1.
|
|
0.96 1. 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9755
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 1. 0.96 0.96 0.97916667 0.91666667
|
|
0.98 0.98 0.97916667 0.91833333]
|
|
|
|
mean value: 0.9653333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 1. 0.92592593 0.92307692 0.96153846 0.86206897
|
|
0.96 0.96 0.95833333 0.84615385]
|
|
|
|
mean value: 0.9357097455545731
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04254627 0.08603024 0.04187155 0.10752797 0.07834935 0.04127502
|
|
0.07960081 0.07283115 0.07332063 0.09973025]
|
|
|
|
mean value: 0.07230832576751708
|
|
|
|
key: score_time
|
|
value: [0.02094698 0.01225877 0.0123167 0.02518773 0.01233006 0.01210451
|
|
0.02332997 0.02044463 0.020823 0.02187777]
|
|
|
|
mean value: 0.018162012100219727
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 1. 0.92 0.76991885 0.79666667 0.87813144
|
|
0.96 0.91833333 0.95993456 0.87833333]
|
|
|
|
mean value: 0.9004276394340908
|
|
|
|
key: train_mcc
|
|
value: [0.97756674 0.96412048 0.98202183 0.98202183 0.97761752 0.96854927
|
|
0.96862817 0.96862627 0.97761617 0.97307234]
|
|
|
|
mean value: 0.9739840618243305
|
|
|
|
key: test_accuracy
|
|
value: [0.96 1. 0.96 0.88 0.89795918 0.93877551
|
|
0.97959184 0.95918367 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9493877551020408
|
|
|
|
key: train_accuracy
|
|
value: [0.98873874 0.98198198 0.99099099 0.99099099 0.98876404 0.98426966
|
|
0.98426966 0.98426966 0.98876404 0.98651685]
|
|
|
|
mean value: 0.9869556635286972
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 1. 0.96 0.86956522 0.89795918 0.94117647
|
|
0.97959184 0.95833333 0.9787234 0.93877551]
|
|
|
|
mean value: 0.948245828951377
|
|
|
|
key: train_fscore
|
|
value: [0.98881432 0.98214286 0.99103139 0.99103139 0.98881432 0.98426966
|
|
0.98434004 0.9844098 0.98886414 0.98660714]
|
|
|
|
mean value: 0.9870325065373433
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.96 0.95238095 0.91666667 0.92307692
|
|
1. 0.95833333 1. 0.92 ]
|
|
|
|
mean value: 0.9630457875457875
|
|
|
|
key: train_precision
|
|
value: [0.98222222 0.97345133 0.98660714 0.98660714 0.98222222 0.98206278
|
|
0.97777778 0.97787611 0.98230088 0.98222222]
|
|
|
|
mean value: 0.9813349829011859
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.96 0.8 0.88 0.96
|
|
0.96 0.95833333 0.95833333 0.95833333]
|
|
|
|
mean value: 0.9355
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.99099099 0.9954955 0.9954955 0.9954955 0.98648649
|
|
0.99099099 0.99103139 0.9955157 0.99103139]
|
|
|
|
mean value: 0.9928028925786774
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 1. 0.96 0.88 0.89833333 0.93833333
|
|
0.98 0.95916667 0.97916667 0.93916667]
|
|
|
|
mean value: 0.9494166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.98873874 0.98198198 0.99099099 0.99099099 0.98877914 0.98427463
|
|
0.98428473 0.98425443 0.98874884 0.98650669]
|
|
|
|
mean value: 0.9869551165515291
|
|
|
|
key: test_jcc
|
|
value: [0.92 1. 0.92307692 0.76923077 0.81481481 0.88888889
|
|
0.96 0.92 0.95833333 0.88461538]
|
|
|
|
mean value: 0.9038960113960114
|
|
|
|
key: train_jcc
|
|
value: [0.97787611 0.96491228 0.98222222 0.98222222 0.97787611 0.96902655
|
|
0.969163 0.96929825 0.97797357 0.97356828]
|
|
|
|
mean value: 0.9744138577637159
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01816893 0.01003361 0.01011896 0.00955749 0.00958657 0.00953865
|
|
0.00951838 0.00986719 0.00967669 0.0097158 ]
|
|
|
|
mean value: 0.010578227043151856
|
|
|
|
key: score_time
|
|
value: [0.01063776 0.00902462 0.00874734 0.00848818 0.00858426 0.00852966
|
|
0.00851727 0.0086422 0.00865912 0.00855947]
|
|
|
|
mean value: 0.008838987350463868
|
|
|
|
key: test_mcc
|
|
value: [0.84 0.72057669 0.6821865 0.68887476 0.60834499 0.68145382
|
|
0.83666667 0.68353656 0.60834499 0.83973406]
|
|
|
|
mean value: 0.7189719046423098
|
|
|
|
key: train_mcc
|
|
value: [0.73591602 0.73047119 0.79279279 0.77966696 0.73110157 0.7447579
|
|
0.74860079 0.78876881 0.75804378 0.70819365]
|
|
|
|
mean value: 0.7518313479211615
|
|
|
|
key: test_accuracy
|
|
value: [0.92 0.86 0.84 0.84 0.79591837 0.83673469
|
|
0.91836735 0.83673469 0.79591837 0.91836735]
|
|
|
|
mean value: 0.856204081632653
|
|
|
|
key: train_accuracy
|
|
value: [0.86711712 0.86486486 0.8963964 0.88963964 0.86516854 0.87191011
|
|
0.8741573 0.89438202 0.87865169 0.85393258]
|
|
|
|
mean value: 0.8756220265209029
|
|
|
|
key: test_fscore
|
|
value: [0.92 0.8627451 0.84615385 0.82608696 0.82142857 0.85185185
|
|
0.92 0.84615385 0.76190476 0.92 ]
|
|
|
|
mean value: 0.8576324932053833
|
|
|
|
key: train_fscore
|
|
value: [0.87145969 0.86784141 0.8963964 0.89135255 0.86784141 0.87472527
|
|
0.87555556 0.89485459 0.88157895 0.85651214]
|
|
|
|
mean value: 0.8778117965717257
|
|
|
|
key: test_precision
|
|
value: [0.92 0.84615385 0.81481481 0.9047619 0.74193548 0.79310345
|
|
0.92 0.78571429 0.88888889 0.88461538]
|
|
|
|
mean value: 0.8499988057095955
|
|
|
|
key: train_precision
|
|
value: [0.84388186 0.84913793 0.8963964 0.87772926 0.84913793 0.85407725
|
|
0.86403509 0.89285714 0.86266094 0.84347826]
|
|
|
|
mean value: 0.8633392061518267
|
|
|
|
key: test_recall
|
|
value: [0.92 0.88 0.88 0.76 0.92 0.92
|
|
0.92 0.91666667 0.66666667 0.95833333]
|
|
|
|
mean value: 0.8741666666666666
|
|
|
|
key: train_recall
|
|
value: [0.9009009 0.88738739 0.8963964 0.90540541 0.88738739 0.8963964
|
|
0.88738739 0.89686099 0.90134529 0.86995516]
|
|
|
|
mean value: 0.892942269623884
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 0.86 0.84 0.84 0.79333333 0.835
|
|
0.91833333 0.83833333 0.79333333 0.91916667]
|
|
|
|
mean value: 0.85575
|
|
|
|
key: train_roc_auc
|
|
value: [0.86711712 0.86486486 0.8963964 0.88963964 0.86521836 0.87196501
|
|
0.87418697 0.89437644 0.87860057 0.8538965 ]
|
|
|
|
mean value: 0.8756261867248415
|
|
|
|
key: test_jcc
|
|
value: [0.85185185 0.75862069 0.73333333 0.7037037 0.6969697 0.74193548
|
|
0.85185185 0.73333333 0.61538462 0.85185185]
|
|
|
|
mean value: 0.7538836411806379
|
|
|
|
key: train_jcc
|
|
value: [0.77220077 0.76653696 0.8122449 0.804 0.76653696 0.77734375
|
|
0.77865613 0.8097166 0.78823529 0.74903475]
|
|
|
|
mean value: 0.7824506118945939
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02029681 0.02655125 0.02202678 0.02859473 0.02717113 0.02124548
|
|
0.02417612 0.02469897 0.02194476 0.02474546]
|
|
|
|
mean value: 0.02414515018463135
|
|
|
|
key: score_time
|
|
value: [0.01013517 0.01141882 0.01170516 0.01172137 0.01174331 0.01170993
|
|
0.01177549 0.01177549 0.01173091 0.01174951]
|
|
|
|
mean value: 0.011546516418457031
|
|
|
|
key: test_mcc
|
|
value: [0.88070485 0.96076892 0.84270097 0.92 0.87813144 0.88388348
|
|
0.96 0.87833333 0.91833333 0.92153718]
|
|
|
|
mean value: 0.9044393501586546
|
|
|
|
key: train_mcc
|
|
value: [0.96459047 0.96847829 0.97297297 0.98214142 0.96878538 0.97307343
|
|
0.96854927 0.97753808 0.96439334 0.98652661]
|
|
|
|
mean value: 0.9727049258772413
|
|
|
|
key: test_accuracy
|
|
value: [0.94 0.98 0.92 0.96 0.93877551 0.93877551
|
|
0.97959184 0.93877551 0.95918367 0.95918367]
|
|
|
|
mean value: 0.9514285714285714
|
|
|
|
key: train_accuracy
|
|
value: [0.98198198 0.98423423 0.98648649 0.99099099 0.98426966 0.98651685
|
|
0.98426966 0.98876404 0.98202247 0.99325843]
|
|
|
|
mean value: 0.98627948172892
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.98039216 0.92307692 0.96 0.94117647 0.94339623
|
|
0.97959184 0.93877551 0.95833333 0.96 ]
|
|
|
|
mean value: 0.9523517967419188
|
|
|
|
key: train_fscore
|
|
value: [0.98230088 0.98426966 0.98648649 0.99107143 0.9844098 0.98654709
|
|
0.98426966 0.98876404 0.98230088 0.99328859]
|
|
|
|
mean value: 0.9863708531116323
|
|
|
|
key: test_precision
|
|
value: [0.95833333 0.96153846 0.88888889 0.96 0.92307692 0.89285714
|
|
1. 0.92 0.95833333 0.92307692]
|
|
|
|
mean value: 0.9386105006105007
|
|
|
|
key: train_precision
|
|
value: [0.96521739 0.98206278 0.98648649 0.98230088 0.97356828 0.98214286
|
|
0.98206278 0.99099099 0.96943231 0.99107143]
|
|
|
|
mean value: 0.9805336196338786
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.96 0.96 0.96 1.
|
|
0.96 0.95833333 0.95833333 1. ]
|
|
|
|
mean value: 0.9676666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.98648649 0.98648649 1. 0.9954955 0.99099099
|
|
0.98648649 0.98654709 0.9955157 0.9955157 ]
|
|
|
|
mean value: 0.9923524421282269
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.98 0.92 0.96 0.93833333 0.9375
|
|
0.98 0.93916667 0.95916667 0.96 ]
|
|
|
|
mean value: 0.9514166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.98198198 0.98423423 0.98648649 0.99099099 0.98429483 0.98652689
|
|
0.98427463 0.98876904 0.98199208 0.99325334]
|
|
|
|
mean value: 0.9862804508544419
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.96153846 0.85714286 0.92307692 0.88888889 0.89285714
|
|
0.96 0.88461538 0.92 0.92307692]
|
|
|
|
mean value: 0.9095811965811966
|
|
|
|
key: train_jcc
|
|
value: [0.96521739 0.96902655 0.97333333 0.98230088 0.96929825 0.97345133
|
|
0.96902655 0.97777778 0.96521739 0.98666667]
|
|
|
|
mean value: 0.9731316115735021
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02000856 0.01987481 0.01789331 0.01822662 0.01879525 0.02104878
|
|
0.01870465 0.01874924 0.0184257 0.01906538]
|
|
|
|
mean value: 0.01907923221588135
|
|
|
|
key: score_time
|
|
value: [0.01179743 0.01172423 0.01174998 0.01170969 0.01171947 0.01174664
|
|
0.01167107 0.01174092 0.01167607 0.01172996]
|
|
|
|
mean value: 0.01172654628753662
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 0.92295821 0.88070485 0.84270097 0.87833333 0.88388348
|
|
0.92153718 0.65813164 0.65813164 0.84852814]
|
|
|
|
mean value: 0.8417867635082827
|
|
|
|
key: train_mcc
|
|
value: [0.9687142 0.96396396 0.93727932 0.92977861 0.94640809 0.969021
|
|
0.9462179 0.79248949 0.89508667 0.84542509]
|
|
|
|
mean value: 0.9194384332251933
|
|
|
|
key: test_accuracy
|
|
value: [0.96 0.96 0.94 0.92 0.93877551 0.93877551
|
|
0.95918367 0.81632653 0.81632653 0.91836735]
|
|
|
|
mean value: 0.9167755102040817
|
|
|
|
key: train_accuracy
|
|
value: [0.98423423 0.98198198 0.96846847 0.96396396 0.97303371 0.98426966
|
|
0.97303371 0.88764045 0.94606742 0.91685393]
|
|
|
|
mean value: 0.9579547525053143
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.96153846 0.93877551 0.92307692 0.93877551 0.94339623
|
|
0.95833333 0.7804878 0.7804878 0.92307692]
|
|
|
|
mean value: 0.910628183093833
|
|
|
|
key: train_fscore
|
|
value: [0.9844098 0.98198198 0.96803653 0.9650655 0.97260274 0.98447894
|
|
0.97272727 0.875 0.94392523 0.92339545]
|
|
|
|
mean value: 0.9571623440331503
|
|
|
|
key: test_precision
|
|
value: [1. 0.92592593 0.95833333 0.88888889 0.95833333 0.89285714
|
|
1. 0.94117647 0.94117647 0.85714286]
|
|
|
|
mean value: 0.9363834422657952
|
|
|
|
key: train_precision
|
|
value: [0.97356828 0.98198198 0.98148148 0.93644068 0.98611111 0.96943231
|
|
0.98165138 0.98870056 0.98536585 0.85769231]
|
|
|
|
mean value: 0.9642425951358867
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.92 0.96 0.92 1.
|
|
0.92 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: 0.8973333333333333
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.98198198 0.95495495 0.9954955 0.95945946 1.
|
|
0.96396396 0.78475336 0.9058296 1. ]
|
|
|
|
mean value: 0.9541934310992607
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.96 0.94 0.92 0.93916667 0.9375
|
|
0.96 0.81333333 0.81333333 0.92 ]
|
|
|
|
mean value: 0.9163333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.98423423 0.98198198 0.96846847 0.96396396 0.97300327 0.98430493
|
|
0.97301337 0.88787218 0.94615804 0.91666667]
|
|
|
|
mean value: 0.9579667111057246
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.92592593 0.88461538 0.85714286 0.88461538 0.89285714
|
|
0.92 0.64 0.64 0.85714286]
|
|
|
|
mean value: 0.8422299552299553
|
|
|
|
key: train_jcc
|
|
value: [0.96929825 0.96460177 0.9380531 0.93248945 0.94666667 0.96943231
|
|
0.94690265 0.77777778 0.89380531 0.85769231]
|
|
|
|
mean value: 0.9196719595496468
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.2051475 0.18799257 0.19177794 0.19366169 0.18941283 0.1898222
|
|
0.19247794 0.1914084 0.19083929 0.1911757 ]
|
|
|
|
mean value: 0.19237160682678223
|
|
|
|
key: score_time
|
|
value: [0.01514149 0.01511407 0.01526618 0.01531124 0.01505971 0.01595616
|
|
0.01512551 0.01527929 0.0151484 0.01520228]
|
|
|
|
mean value: 0.015260434150695801
|
|
|
|
key: test_mcc
|
|
value: [0.92 0.96076892 0.92295821 0.92 0.91833333 0.84757938
|
|
1. 0.91833333 0.96 0.87813144]
|
|
|
|
mean value: 0.9246104616745944
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96 0.98 0.96 0.96 0.95918367 0.91836735
|
|
1. 0.95918367 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9615102040816327
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.97959184 0.96153846 0.96 0.96 0.92592593
|
|
1. 0.95833333 0.97959184 0.93617021]
|
|
|
|
mean value: 0.9621151607033066
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96 1. 0.92592593 0.96 0.96 0.86206897
|
|
1. 0.95833333 0.96 0.95652174]
|
|
|
|
mean value: 0.9542849963906935
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 0.96 1. 0.96 0.96 1.
|
|
1. 0.95833333 1. 0.91666667]
|
|
|
|
mean value: 0.9715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.98 0.96 0.96 0.95916667 0.91666667
|
|
1. 0.95916667 0.98 0.93833333]
|
|
|
|
mean value: 0.9613333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.96 0.92592593 0.92307692 0.92307692 0.86206897
|
|
1. 0.92 0.96 0.88 ]
|
|
|
|
mean value: 0.9277225660673937
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05800104 0.07738423 0.07909322 0.07274413 0.06361341 0.0714252
|
|
0.08117342 0.0664916 0.07769418 0.06700635]
|
|
|
|
mean value: 0.07146267890930176
|
|
|
|
key: score_time
|
|
value: [0.02654862 0.03917718 0.02118254 0.02610636 0.03847408 0.03358889
|
|
0.02448678 0.02260137 0.02334237 0.04202318]
|
|
|
|
mean value: 0.02975313663482666
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.92 0.96076892 0.92 0.83973406 0.88388348
|
|
0.96 0.91833333 0.91833333 0.87813144]
|
|
|
|
mean value: 0.9159953487885168
|
|
|
|
key: train_mcc
|
|
value: [0.97756674 0.99103121 0.99099099 0.97756674 0.98652661 0.99105141
|
|
0.98652661 0.99105141 0.97753808 0.9955157 ]
|
|
|
|
mean value: 0.9865365498369552
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.96 0.98 0.96 0.91836735 0.93877551
|
|
0.97959184 0.95918367 0.95918367 0.93877551]
|
|
|
|
mean value: 0.9573877551020408
|
|
|
|
key: train_accuracy
|
|
value: [0.98873874 0.9954955 0.9954955 0.98873874 0.99325843 0.99550562
|
|
0.99325843 0.99550562 0.98876404 0.99775281]
|
|
|
|
mean value: 0.9932513412288694
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.96 0.98039216 0.96 0.91666667 0.94339623
|
|
0.97959184 0.95833333 0.95833333 0.93617021]
|
|
|
|
mean value: 0.9572475602846517
|
|
|
|
key: train_fscore
|
|
value: [0.98881432 0.9955157 0.9954955 0.98866213 0.99322799 0.9955157
|
|
0.99322799 0.9954955 0.98876404 0.99775281]
|
|
|
|
mean value: 0.9932471666192065
|
|
|
|
key: test_precision
|
|
value: [1. 0.96 0.96153846 0.96 0.95652174 0.89285714
|
|
1. 0.95833333 0.95833333 0.95652174]
|
|
|
|
mean value: 0.9604105749323141
|
|
|
|
key: train_precision
|
|
value: [0.98222222 0.99107143 0.9954955 0.99543379 0.99547511 0.99107143
|
|
0.99547511 1. 0.99099099 1. ]
|
|
|
|
mean value: 0.9937235582050248
|
|
|
|
key: test_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.96 0.96 1. 0.96 0.88 1.
|
|
0.96 0.95833333 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9553333333333334
|
|
|
|
key: train_recall
|
|
value: [0.9954955 1. 0.9954955 0.98198198 0.99099099 1.
|
|
0.99099099 0.99103139 0.98654709 0.9955157 ]
|
|
|
|
mean value: 0.9928049125358542
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96 0.98 0.96 0.91916667 0.9375
|
|
0.98 0.95916667 0.95916667 0.93833333]
|
|
|
|
mean value: 0.9573333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.98873874 0.9954955 0.9954955 0.98873874 0.99325334 0.9955157
|
|
0.99325334 0.9955157 0.98876904 0.99775785]
|
|
|
|
mean value: 0.9932533430291278
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.92307692 0.96153846 0.92307692 0.84615385 0.89285714
|
|
0.96 0.92 0.92 0.88 ]
|
|
|
|
mean value: 0.9186703296703297
|
|
|
|
key: train_jcc
|
|
value: [0.97787611 0.99107143 0.99103139 0.97757848 0.98654709 0.99107143
|
|
0.98654709 0.99103139 0.97777778 0.9955157 ]
|
|
|
|
mean value: 0.9866047862191558
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1462822 0.17134476 0.15874028 0.18130159 0.19656086 0.16752148
|
|
0.18994427 0.0985918 0.11454821 0.15534019]
|
|
|
|
mean value: 0.15801756381988524
|
|
|
|
key: score_time
|
|
value: [0.02414179 0.02950263 0.0275178 0.02477813 0.02685452 0.02670765
|
|
0.02468276 0.01489377 0.01492238 0.02472472]
|
|
|
|
mean value: 0.02387261390686035
|
|
|
|
key: test_mcc
|
|
value: [0.72057669 0.76244374 0.64051262 0.64051262 0.55390031 0.7145252
|
|
0.715 0.59166667 0.6446564 0.83973406]
|
|
|
|
mean value: 0.6823528296330293
|
|
|
|
key: train_mcc
|
|
value: [0.97748739 0.97313095 0.97301246 0.97756674 0.98660607 0.98202238
|
|
0.99101119 0.98652689 0.96420203 0.97753808]
|
|
|
|
mean value: 0.9789104172301524
|
|
|
|
key: test_accuracy
|
|
value: [0.86 0.88 0.82 0.82 0.7755102 0.85714286
|
|
0.85714286 0.79591837 0.81632653 0.91836735]
|
|
|
|
mean value: 0.8400408163265306
|
|
|
|
key: train_accuracy
|
|
value: [0.98873874 0.98648649 0.98648649 0.98873874 0.99325843 0.99101124
|
|
0.99550562 0.99325843 0.98202247 0.98876404]
|
|
|
|
mean value: 0.9894270675169552
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.875 0.82352941 0.82352941 0.79245283 0.8627451
|
|
0.85714286 0.79166667 0.79069767 0.92 ]
|
|
|
|
mean value: 0.8393906807128292
|
|
|
|
key: train_fscore
|
|
value: [0.98871332 0.98636364 0.98642534 0.98866213 0.99319728 0.99099099
|
|
0.9954955 0.99325843 0.98190045 0.98876404]
|
|
|
|
mean value: 0.9893771115330703
|
|
|
|
key: test_precision
|
|
value: [0.875 0.91304348 0.80769231 0.80769231 0.75 0.84615385
|
|
0.875 0.79166667 0.89473684 0.88461538]
|
|
|
|
mean value: 0.8445600833186645
|
|
|
|
key: train_precision
|
|
value: [0.99095023 0.99541284 0.99090909 0.99543379 1. 0.99099099
|
|
0.9954955 0.9954955 0.99086758 0.99099099]
|
|
|
|
mean value: 0.9936546504026119
|
|
|
|
key: test_recall
|
|
value: [0.84 0.84 0.84 0.84 0.84 0.88
|
|
0.84 0.79166667 0.70833333 0.95833333]
|
|
|
|
mean value: 0.8378333333333333
|
|
|
|
key: train_recall
|
|
value: [0.98648649 0.97747748 0.98198198 0.98198198 0.98648649 0.99099099
|
|
0.9954955 0.99103139 0.97309417 0.98654709]
|
|
|
|
mean value: 0.9851573546640812
|
|
|
|
key: test_roc_auc
|
|
value: [0.86 0.88 0.82 0.82 0.77416667 0.85666667
|
|
0.8575 0.79583333 0.81416667 0.91916667]
|
|
|
|
mean value: 0.83975
|
|
|
|
key: train_roc_auc
|
|
value: [0.98873874 0.98648649 0.98648649 0.98873874 0.99324324 0.99101119
|
|
0.9955056 0.99326344 0.98204258 0.98876904]
|
|
|
|
mean value: 0.9894285541146528
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.77777778 0.7 0.7 0.65625 0.75862069
|
|
0.75 0.65517241 0.65384615 0.85185185]
|
|
|
|
mean value: 0.725351888692406
|
|
|
|
key: train_jcc
|
|
value: [0.97767857 0.97309417 0.97321429 0.97757848 0.98648649 0.98214286
|
|
0.99103139 0.98660714 0.96444444 0.97777778]
|
|
|
|
mean value: 0.9790055601726005
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.74349904 0.74846554 0.76858521 0.77263737 0.8602097 0.77435875
|
|
0.77104759 0.76435328 0.76286674 0.74993825]
|
|
|
|
mean value: 0.7715961456298828
|
|
|
|
key: score_time
|
|
value: [0.00955248 0.00944972 0.00906014 0.00950265 0.01088953 0.00938582
|
|
0.00932217 0.00925708 0.00946403 0.00925159]
|
|
|
|
mean value: 0.009513521194458007
|
|
|
|
key: test_mcc
|
|
value: [0.92 1. 0.92295821 0.92 0.83973406 0.81223286
|
|
0.96 0.96 0.95993456 0.83666667]
|
|
|
|
mean value: 0.9131526353802787
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96 1. 0.96 0.96 0.91836735 0.89795918
|
|
0.97959184 0.97959184 0.97959184 0.91836735]
|
|
|
|
mean value: 0.9553469387755102
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96 1. 0.96153846 0.96 0.91666667 0.90909091
|
|
0.97959184 0.97959184 0.9787234 0.91666667]
|
|
|
|
mean value: 0.9561869781687411
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96 1. 0.92592593 0.96 0.95652174 0.83333333
|
|
1. 0.96 1. 0.91666667]
|
|
|
|
mean value: 0.951244766505636
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 0.96 0.88 1.
|
|
0.96 1. 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9635
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 1. 0.96 0.96 0.91916667 0.89583333
|
|
0.98 0.98 0.97916667 0.91833333]
|
|
|
|
mean value: 0.9552499999999999
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 1. 0.92592593 0.92307692 0.84615385 0.83333333
|
|
0.96 0.96 0.95833333 0.84615385]
|
|
|
|
mean value: 0.9176054131054131
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03106737 0.02843428 0.05064511 0.03269029 0.03245735 0.0472188
|
|
0.03142047 0.02927351 0.02921271 0.04299045]
|
|
|
|
mean value: 0.03554103374481201
|
|
|
|
key: score_time
|
|
value: [0.01255679 0.01286769 0.02034068 0.01433754 0.01428699 0.02095914
|
|
0.01433921 0.01431465 0.0141778 0.01422095]
|
|
|
|
mean value: 0.015240144729614259
|
|
|
|
key: test_mcc
|
|
value: [0.80582296 0.76991885 0.52678658 0.6333005 0.69302938 0.6750504
|
|
0.7645166 0.76603235 0.69302938 0.55612092]
|
|
|
|
mean value: 0.688360793356357
|
|
|
|
key: train_mcc
|
|
value: [0.88039033 0.85030451 0.94629162 0.91124214 0.86216422 0.93267075
|
|
0.88329294 0.94319127 0.89244472 0.83774668]
|
|
|
|
mean value: 0.8939739173612107
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.88 0.76 0.8 0.83673469 0.83673469
|
|
0.87755102 0.87755102 0.83673469 0.7755102 ]
|
|
|
|
mean value: 0.8380816326530612
|
|
|
|
key: train_accuracy
|
|
value: [0.93918919 0.92342342 0.97297297 0.95495495 0.93033708 0.96629213
|
|
0.94157303 0.97078652 0.94382022 0.91235955]
|
|
|
|
mean value: 0.9455709079866383
|
|
|
|
key: test_fscore
|
|
value: [0.90566038 0.88888889 0.77777778 0.82758621 0.85714286 0.84615385
|
|
0.88888889 0.88461538 0.80952381 0.78431373]
|
|
|
|
mean value: 0.8470551762736691
|
|
|
|
key: train_fscore
|
|
value: [0.94117647 0.92672414 0.97260274 0.95614035 0.9321663 0.96644295
|
|
0.94196429 0.96997691 0.94089835 0.9041769 ]
|
|
|
|
mean value: 0.9452269394468622
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.82758621 0.72413793 0.72727273 0.77419355 0.81481481
|
|
0.82758621 0.82142857 0.94444444 0.74074074]
|
|
|
|
mean value: 0.8059348049058839
|
|
|
|
key: train_precision
|
|
value: [0.91139241 0.88842975 0.98611111 0.93162393 0.90638298 0.96
|
|
0.93362832 1. 0.995 1. ]
|
|
|
|
mean value: 0.9512568497171925
|
|
|
|
key: test_recall
|
|
value: [0.96 0.96 0.84 0.96 0.96 0.88
|
|
0.96 0.95833333 0.70833333 0.83333333]
|
|
|
|
mean value: 0.902
|
|
|
|
key: train_recall
|
|
value: [0.97297297 0.96846847 0.95945946 0.98198198 0.95945946 0.97297297
|
|
0.95045045 0.94170404 0.89237668 0.82511211]
|
|
|
|
mean value: 0.9424958590877873
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.88 0.76 0.8 0.83416667 0.83583333
|
|
0.87583333 0.87916667 0.83416667 0.77666667]
|
|
|
|
mean value: 0.8375833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.93918919 0.92342342 0.97297297 0.95495495 0.93040238 0.96630711
|
|
0.94159294 0.97085202 0.94393609 0.91255605]
|
|
|
|
mean value: 0.9456187128832869
|
|
|
|
key: test_jcc
|
|
value: [0.82758621 0.8 0.63636364 0.70588235 0.75 0.73333333
|
|
0.8 0.79310345 0.68 0.64516129]
|
|
|
|
mean value: 0.7371430268133141
|
|
|
|
key: train_jcc
|
|
value: [0.88888889 0.86345382 0.94666667 0.91596639 0.87295082 0.93506494
|
|
0.89029536 0.94170404 0.88839286 0.82511211]
|
|
|
|
mean value: 0.8968495871398692
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02365279 0.03753495 0.03749871 0.03739238 0.04007196 0.0428896
|
|
0.03537941 0.03756166 0.03808331 0.03779888]
|
|
|
|
mean value: 0.036786365509033206
|
|
|
|
key: score_time
|
|
value: [0.01868844 0.02436852 0.01854777 0.01862192 0.02282 0.0207479
|
|
0.01206541 0.02361345 0.02306271 0.02324009]
|
|
|
|
mean value: 0.020577621459960938
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.96076892 0.92295821 0.84270097 0.83920658 0.92128466
|
|
1. 0.87813144 0.91833333 0.92153718]
|
|
|
|
mean value: 0.9165690215392522
|
|
|
|
key: train_mcc
|
|
value: [0.96431623 0.9597029 0.96412048 0.96412048 0.95979475 0.96420203
|
|
0.95979475 0.96862627 0.96439334 0.96862627]
|
|
|
|
mean value: 0.9637697486422239
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.98 0.96 0.92 0.91836735 0.95918367
|
|
1. 0.93877551 0.95918367 0.95918367]
|
|
|
|
mean value: 0.957469387755102
|
|
|
|
key: train_accuracy
|
|
value: [0.98198198 0.97972973 0.98198198 0.98198198 0.97977528 0.98202247
|
|
0.97977528 0.98426966 0.98202247 0.98426966]
|
|
|
|
mean value: 0.981781050713635
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 0.96153846 0.91666667 0.92307692 0.96153846
|
|
1. 0.93617021 0.95833333 0.96 ]
|
|
|
|
mean value: 0.9577308052517243
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:128: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:131: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98222222 0.97995546 0.98214286 0.98214286 0.97995546 0.98214286
|
|
0.97995546 0.9844098 0.98230088 0.9844098 ]
|
|
|
|
mean value: 0.9819637647426145
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 0.92592593 0.95652174 0.88888889 0.92592593
|
|
1. 0.95652174 0.95833333 0.92307692]
|
|
|
|
mean value: 0.9496732936950328
|
|
|
|
key: train_precision
|
|
value: [0.96929825 0.969163 0.97345133 0.97345133 0.969163 0.97345133
|
|
0.969163 0.97787611 0.96943231 0.97787611]
|
|
|
|
mean value: 0.9722325741498922
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 0.88 0.96 1.
|
|
1. 0.91666667 0.95833333 1. ]
|
|
|
|
mean value: 0.9675
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.99099099 0.99099099 0.99099099 0.99099099 0.99099099
|
|
0.99099099 0.99103139 0.9955157 0.99103139]
|
|
|
|
mean value: 0.9919019916777764
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.98 0.96 0.92 0.9175 0.95833333
|
|
1. 0.93833333 0.95916667 0.96 ]
|
|
|
|
mean value: 0.9573333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.98198198 0.97972973 0.98198198 0.98198198 0.97980043 0.98204258
|
|
0.97980043 0.98425443 0.98199208 0.98425443]
|
|
|
|
mean value: 0.9817820062214682
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 0.92592593 0.84615385 0.85714286 0.92592593
|
|
1. 0.88 0.92 0.92307692]
|
|
|
|
mean value: 0.919976393976394
|
|
|
|
key: train_jcc
|
|
value: [0.9650655 0.96069869 0.96491228 0.96491228 0.96069869 0.96491228
|
|
0.96069869 0.96929825 0.96521739 0.96929825]
|
|
|
|
mean value: 0.9645712296690083
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.26051164 0.27078271 0.26869369 0.29507327 0.313694 0.2837832
|
|
0.27566242 0.27053499 0.30342627 0.28404856]
|
|
|
|
mean value: 0.28262107372283934
|
|
|
|
key: score_time
|
|
value: [0.0187788 0.01875639 0.01866913 0.01870775 0.02089095 0.02177954
|
|
0.02266693 0.02196908 0.02367878 0.02244806]
|
|
|
|
mean value: 0.02083454132080078
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 1. 0.92295821 0.80582296 0.83920658 0.87813144
|
|
1. 0.87813144 0.95993456 0.92153718]
|
|
|
|
mean value: 0.912868057229365
|
|
|
|
key: train_mcc
|
|
value: [0.9687142 0.96412048 0.96412048 0.96855691 0.95979475 0.96420203
|
|
0.95979475 0.96862627 0.96878225 0.96862627]
|
|
|
|
mean value: 0.9655338374704798
|
|
|
|
key: test_accuracy
|
|
value: [0.96 1. 0.96 0.9 0.91836735 0.93877551
|
|
1. 0.93877551 0.97959184 0.95918367]
|
|
|
|
mean value: 0.955469387755102
|
|
|
|
key: train_accuracy
|
|
value: [0.98423423 0.98198198 0.98198198 0.98423423 0.97977528 0.98202247
|
|
0.97977528 0.98426966 0.98426966 0.98426966]
|
|
|
|
mean value: 0.9826814454904342
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 1. 0.96153846 0.89361702 0.92307692 0.94117647
|
|
1. 0.93617021 0.9787234 0.96 ]
|
|
|
|
mean value: 0.9552635826834825
|
|
|
|
key: train_fscore
|
|
value: [0.9844098 0.98214286 0.98214286 0.98434004 0.97995546 0.98214286
|
|
0.97995546 0.9844098 0.98447894 0.9844098 ]
|
|
|
|
mean value: 0.9828387863673758
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.92592593 0.95454545 0.88888889 0.92307692
|
|
1. 0.95652174 1. 0.92307692]
|
|
|
|
mean value: 0.957203585464455
|
|
|
|
key: train_precision
|
|
value: [0.97356828 0.97345133 0.97345133 0.97777778 0.969163 0.97345133
|
|
0.969163 0.97787611 0.97368421 0.97787611]
|
|
|
|
mean value: 0.9739462456122112
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 1. 0.84 0.96 0.96
|
|
1. 0.91666667 0.95833333 1. ]
|
|
|
|
mean value: 0.9555
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.99099099 0.99099099 0.99099099 0.99099099 0.99099099
|
|
0.99099099 0.99103139 0.9955157 0.99103139]
|
|
|
|
mean value: 0.9919019916777764
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 1. 0.96 0.9 0.9175 0.93833333
|
|
1. 0.93833333 0.97916667 0.96 ]
|
|
|
|
mean value: 0.9553333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.98423423 0.98198198 0.98198198 0.98423423 0.97980043 0.98204258
|
|
0.97980043 0.98425443 0.98424433 0.98425443]
|
|
|
|
mean value: 0.982682907122369
|
|
|
|
key: test_jcc
|
|
value: [0.92 1. 0.92592593 0.80769231 0.85714286 0.88888889
|
|
1. 0.88 0.95833333 0.92307692]
|
|
|
|
mean value: 0.9161060236060237
|
|
|
|
key: train_jcc
|
|
value: [0.96929825 0.96491228 0.96491228 0.969163 0.96069869 0.96491228
|
|
0.96069869 0.96929825 0.96943231 0.96929825]
|
|
|
|
mean value: 0.9662624268865226
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02755475 0.03678417 0.0360043 0.03641462 0.03691816 0.03945303
|
|
0.03611469 0.03536558 0.03675437 0.03592873]
|
|
|
|
mean value: 0.03572924137115478
|
|
|
|
key: score_time
|
|
value: [0.0120101 0.01210594 0.01322579 0.01349235 0.01350141 0.01210308
|
|
0.01323247 0.01334023 0.01318455 0.01211023]
|
|
|
|
mean value: 0.012830615043640137
|
|
|
|
key: test_mcc
|
|
value: [0.80064077 0.96076892 0.84270097 0.60192927 0.83920658 0.88388348
|
|
0.83666667 0.79666667 0.6750504 0.88443328]
|
|
|
|
mean value: 0.8121946991120359
|
|
|
|
key: train_mcc
|
|
value: [0.87910109 0.86980187 0.91486933 0.88320552 0.87038632 0.87887912
|
|
0.87473234 0.87471218 0.88402716 0.88835685]
|
|
|
|
mean value: 0.8818071779685382
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.98 0.92 0.8 0.91836735 0.93877551
|
|
0.91836735 0.89795918 0.83673469 0.93877551]
|
|
|
|
mean value: 0.9048979591836734
|
|
|
|
key: train_accuracy
|
|
value: [0.93918919 0.93468468 0.95720721 0.94144144 0.93483146 0.93932584
|
|
0.93707865 0.93707865 0.94157303 0.94382022]
|
|
|
|
mean value: 0.9406230387691061
|
|
|
|
key: test_fscore
|
|
value: [0.89795918 0.98039216 0.92307692 0.79166667 0.92307692 0.94339623
|
|
0.92 0.89795918 0.82608696 0.94117647]
|
|
|
|
mean value: 0.9044790690555266
|
|
|
|
key: train_fscore
|
|
value: [0.94039735 0.93569845 0.9578714 0.94222222 0.93598234 0.93986637
|
|
0.9380531 0.93832599 0.94298246 0.94505495]
|
|
|
|
mean value: 0.941645461740113
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.96153846 0.88888889 0.82608696 0.88888889 0.89285714
|
|
0.92 0.88 0.86363636 0.88888889]
|
|
|
|
mean value: 0.8927452257887041
|
|
|
|
key: train_precision
|
|
value: [0.92207792 0.92139738 0.94323144 0.92982456 0.91774892 0.92951542
|
|
0.92173913 0.92207792 0.92274678 0.92672414]
|
|
|
|
mean value: 0.9257083612252869
|
|
|
|
key: test_recall
|
|
value: [0.88 1. 0.96 0.76 0.96 1.
|
|
0.92 0.91666667 0.79166667 1. ]
|
|
|
|
mean value: 0.9188333333333333
|
|
|
|
key: train_recall
|
|
value: [0.95945946 0.95045045 0.97297297 0.95495495 0.95495495 0.95045045
|
|
0.95495495 0.95515695 0.96412556 0.96412556]
|
|
|
|
mean value: 0.9581606269947077
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.98 0.92 0.8 0.9175 0.9375
|
|
0.91833333 0.89833333 0.83583333 0.94 ]
|
|
|
|
mean value: 0.9047499999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.93918919 0.93468468 0.95720721 0.94144144 0.93487658 0.93935079
|
|
0.93711873 0.93703793 0.94152224 0.94377449]
|
|
|
|
mean value: 0.9406203288490284
|
|
|
|
key: test_jcc
|
|
value: [0.81481481 0.96153846 0.85714286 0.65517241 0.85714286 0.89285714
|
|
0.85185185 0.81481481 0.7037037 0.88888889]
|
|
|
|
mean value: 0.8297927806548496
|
|
|
|
key: train_jcc
|
|
value: [0.8875 0.87916667 0.91914894 0.8907563 0.87966805 0.88655462
|
|
0.88333333 0.88381743 0.89211618 0.89583333]
|
|
|
|
mean value: 0.8897894853624331
|
|
|
|
MCC on Blind test: 0.77
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.80070734 0.93604183 0.80317092 0.7838819 0.96816945 0.80117154
|
|
0.89360476 0.85581207 0.78868461 0.88255024]
|
|
|
|
mean value: 0.8513794660568237
|
|
|
|
key: score_time
|
|
value: [0.01340437 0.01333737 0.01344609 0.01338077 0.01332951 0.01321507
|
|
0.0133996 0.01334405 0.01338911 0.01325583]
|
|
|
|
mean value: 0.013350176811218261
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 0.88070485 0.80064077 0.88640526 0.92153718 0.92128466
|
|
0.96 0.79632832 0.92128466 0.96 ]
|
|
|
|
mean value: 0.8971143909421725
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96 0.94 0.9 0.94 0.95918367 0.95918367
|
|
0.97959184 0.89795918 0.95918367 0.97959184]
|
|
|
|
mean value: 0.947469387755102
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.93877551 0.90196078 0.93617021 0.95833333 0.96153846
|
|
0.97959184 0.89361702 0.95652174 0.97959184]
|
|
|
|
mean value: 0.9464434069365311
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95833333 0.88461538 1. 1. 0.92592593
|
|
1. 0.91304348 1. 0.96 ]
|
|
|
|
mean value: 0.9641918122135513
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.92 0.92 0.92 0.88 0.92 1.
|
|
0.96 0.875 0.91666667 1. ]
|
|
|
|
mean value: 0.9311666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.94 0.9 0.94 0.96 0.95833333
|
|
0.98 0.8975 0.95833333 0.98 ]
|
|
|
|
mean value: 0.9474166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.88461538 0.82142857 0.88 0.92 0.92592593
|
|
0.96 0.80769231 0.91666667 0.96 ]
|
|
|
|
mean value: 0.8996328856328857
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.78
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01465964 0.01194119 0.01084161 0.00987864 0.00998402 0.01108885
|
|
0.01007175 0.01049519 0.01010418 0.01023102]
|
|
|
|
mean value: 0.010929608345031738
|
|
|
|
key: score_time
|
|
value: [0.01477027 0.00942087 0.00908589 0.008955 0.00895452 0.00905156
|
|
0.00886488 0.00900698 0.00931907 0.00914359]
|
|
|
|
mean value: 0.00965726375579834
|
|
|
|
key: test_mcc
|
|
value: [0.52167203 0.44574249 0.52678658 0.60783067 0.64549722 0.65813164
|
|
0.7145252 0.63272208 0.51 0.43604918]
|
|
|
|
mean value: 0.5698957114099669
|
|
|
|
key: train_mcc
|
|
value: [0.62324241 0.5533577 0.66153482 0.6428038 0.60182525 0.62886607
|
|
0.61235657 0.58714971 0.62492453 0.56698027]
|
|
|
|
mean value: 0.6103041118724186
|
|
|
|
key: test_accuracy
|
|
value: [0.76 0.72 0.76 0.8 0.79591837 0.81632653
|
|
0.85714286 0.81632653 0.75510204 0.71428571]
|
|
|
|
mean value: 0.7795102040816326
|
|
|
|
key: train_accuracy
|
|
value: [0.81081081 0.77477477 0.82882883 0.81981982 0.8 0.81348315
|
|
0.80449438 0.79325843 0.81123596 0.78202247]
|
|
|
|
mean value: 0.8038728616256706
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.74074074 0.77777778 0.7826087 0.83333333 0.84210526
|
|
0.8627451 0.80851064 0.75 0.73076923]
|
|
|
|
mean value: 0.7897821546999009
|
|
|
|
key: train_fscore
|
|
value: [0.8173913 0.78723404 0.83760684 0.82832618 0.80694143 0.81995662
|
|
0.8137045 0.78899083 0.81974249 0.79317697]
|
|
|
|
mean value: 0.8113071196515627
|
|
|
|
key: test_precision
|
|
value: [0.74074074 0.68965517 0.72413793 0.85714286 0.71428571 0.75
|
|
0.84615385 0.82608696 0.75 0.67857143]
|
|
|
|
mean value: 0.7576774646864601
|
|
|
|
key: train_precision
|
|
value: [0.78991597 0.74596774 0.79674797 0.79098361 0.77824268 0.79079498
|
|
0.7755102 0.80751174 0.78600823 0.75609756]
|
|
|
|
mean value: 0.7817780671861976
|
|
|
|
key: test_recall
|
|
value: [0.8 0.8 0.84 0.72 1. 0.96
|
|
0.88 0.79166667 0.75 0.79166667]
|
|
|
|
mean value: 0.8333333333333334
|
|
|
|
key: train_recall
|
|
value: [0.84684685 0.83333333 0.88288288 0.86936937 0.83783784 0.85135135
|
|
0.85585586 0.77130045 0.85650224 0.83408072]
|
|
|
|
mean value: 0.8439360885549226
|
|
|
|
key: test_roc_auc
|
|
value: [0.76 0.72 0.76 0.8 0.79166667 0.81333333
|
|
0.85666667 0.81583333 0.755 0.71583333]
|
|
|
|
mean value: 0.7788333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.81081081 0.77477477 0.82882883 0.81981982 0.80008484 0.81356805
|
|
0.80460954 0.79330788 0.811134 0.78190522]
|
|
|
|
mean value: 0.8038843776511938
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.58823529 0.63636364 0.64285714 0.71428571 0.72727273
|
|
0.75862069 0.67857143 0.6 0.57575758]
|
|
|
|
mean value: 0.6546964208881044
|
|
|
|
key: train_jcc
|
|
value: [0.69117647 0.64912281 0.72058824 0.70695971 0.67636364 0.69485294
|
|
0.68592058 0.65151515 0.69454545 0.65724382]
|
|
|
|
mean value: 0.6828288797332063
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01068377 0.01055551 0.01216602 0.01032829 0.01093745 0.01072049
|
|
0.01034594 0.01028442 0.01040578 0.01030898]
|
|
|
|
mean value: 0.01067366600036621
|
|
|
|
key: score_time
|
|
value: [0.00947642 0.00903702 0.00938368 0.0091145 0.00900269 0.00949693
|
|
0.00891304 0.0089314 0.00915146 0.00891685]
|
|
|
|
mean value: 0.009142398834228516
|
|
|
|
key: test_mcc
|
|
value: [0.60192927 0.40032038 0.52 0.56044854 0.65813164 0.59839104
|
|
0.51252158 0.59297231 0.63272208 0.6750504 ]
|
|
|
|
mean value: 0.5752487245999452
|
|
|
|
key: train_mcc
|
|
value: [0.63963964 0.66233017 0.67117798 0.65768435 0.62845024 0.63596332
|
|
0.64046624 0.64048076 0.63166622 0.66292294]
|
|
|
|
mean value: 0.6470781844201794
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.7 0.76 0.78 0.81632653 0.79591837
|
|
0.75510204 0.79591837 0.81632653 0.83673469]
|
|
|
|
mean value: 0.7856326530612245
|
|
|
|
key: train_accuracy
|
|
value: [0.81981982 0.83108108 0.83558559 0.82882883 0.81348315 0.81797753
|
|
0.82022472 0.82022472 0.81573034 0.83146067]
|
|
|
|
mean value: 0.8234416438910821
|
|
|
|
key: test_fscore
|
|
value: [0.79166667 0.70588235 0.76 0.7755102 0.84210526 0.81481481
|
|
0.75 0.7826087 0.80851064 0.82608696]
|
|
|
|
mean value: 0.7857185592133971
|
|
|
|
key: train_fscore
|
|
value: [0.81981982 0.83296214 0.83521445 0.8280543 0.81917211 0.81797753
|
|
0.81900452 0.81981982 0.81858407 0.83221477]
|
|
|
|
mean value: 0.824282352548306
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.69230769 0.76 0.79166667 0.75 0.75862069
|
|
0.7826087 0.81818182 0.82608696 0.86363636]
|
|
|
|
mean value: 0.7869195839143366
|
|
|
|
key: train_precision
|
|
value: [0.81981982 0.82378855 0.83710407 0.83181818 0.79324895 0.8161435
|
|
0.82272727 0.82352941 0.80786026 0.83035714]
|
|
|
|
mean value: 0.820639715255508
|
|
|
|
key: test_recall
|
|
value: [0.76 0.72 0.76 0.76 0.96 0.88
|
|
0.72 0.75 0.79166667 0.79166667]
|
|
|
|
mean value: 0.7893333333333333
|
|
|
|
key: train_recall
|
|
value: [0.81981982 0.84234234 0.83333333 0.82432432 0.84684685 0.81981982
|
|
0.81531532 0.8161435 0.82959641 0.83408072]
|
|
|
|
mean value: 0.8281622429604493
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.7 0.76 0.78 0.81333333 0.79416667
|
|
0.75583333 0.795 0.81583333 0.83583333]
|
|
|
|
mean value: 0.7849999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.81981982 0.83108108 0.83558559 0.82882883 0.81355795 0.81798166
|
|
0.82021371 0.82023391 0.81569911 0.83145477]
|
|
|
|
mean value: 0.8234456429523694
|
|
|
|
key: test_jcc
|
|
value: [0.65517241 0.54545455 0.61290323 0.63333333 0.72727273 0.6875
|
|
0.6 0.64285714 0.67857143 0.7037037 ]
|
|
|
|
mean value: 0.6486768520792436
|
|
|
|
key: train_jcc
|
|
value: [0.69465649 0.71374046 0.71705426 0.70656371 0.69372694 0.69201521
|
|
0.69348659 0.69465649 0.6928839 0.71264368]
|
|
|
|
mean value: 0.7011427714969269
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01042533 0.01071596 0.00960279 0.01084709 0.01065874 0.00931907
|
|
0.01050949 0.01033163 0.01045561 0.00955653]
|
|
|
|
mean value: 0.010242223739624023
|
|
|
|
key: score_time
|
|
value: [0.01264215 0.01632237 0.01234102 0.01228786 0.0191865 0.0182519
|
|
0.02154088 0.01270795 0.0116992 0.01153326]
|
|
|
|
mean value: 0.01485130786895752
|
|
|
|
key: test_mcc
|
|
value: [0.48349378 0.52678658 0.24019223 0.60192927 0.30550961 0.46911585
|
|
0.38731273 0.55166667 0.51089422 0.43226548]
|
|
|
|
mean value: 0.45091664116727936
|
|
|
|
key: train_mcc
|
|
value: [0.68073982 0.68479585 0.68538038 0.66297652 0.64053662 0.63624247
|
|
0.64053662 0.67226438 0.65427913 0.68176621]
|
|
|
|
mean value: 0.6639518009642478
|
|
|
|
key: test_accuracy
|
|
value: [0.74 0.76 0.62 0.8 0.65306122 0.73469388
|
|
0.69387755 0.7755102 0.75510204 0.71428571]
|
|
|
|
mean value: 0.7246530612244898
|
|
|
|
key: train_accuracy
|
|
value: [0.84009009 0.84234234 0.84234234 0.83108108 0.82022472 0.81797753
|
|
0.82022472 0.83595506 0.82696629 0.84044944]
|
|
|
|
mean value: 0.8317653608664844
|
|
|
|
key: test_fscore
|
|
value: [0.72340426 0.73913043 0.62745098 0.80769231 0.66666667 0.74509804
|
|
0.70588235 0.7755102 0.73913043 0.68181818]
|
|
|
|
mean value: 0.7211783857692176
|
|
|
|
key: train_fscore
|
|
value: [0.83678161 0.84090909 0.83870968 0.82678984 0.81818182 0.81464531
|
|
0.81818182 0.83371298 0.82460137 0.83678161]
|
|
|
|
mean value: 0.8289295121141821
|
|
|
|
key: test_precision
|
|
value: [0.77272727 0.80952381 0.61538462 0.77777778 0.65384615 0.73076923
|
|
0.69230769 0.76 0.77272727 0.75 ]
|
|
|
|
mean value: 0.7335063825063826
|
|
|
|
key: train_precision
|
|
value: [0.85446009 0.84862385 0.85849057 0.84834123 0.82568807 0.82790698
|
|
0.82568807 0.84722222 0.83796296 0.85849057]
|
|
|
|
mean value: 0.8432874620129045
|
|
|
|
key: test_recall
|
|
value: [0.68 0.68 0.64 0.84 0.68 0.76
|
|
0.72 0.79166667 0.70833333 0.625 ]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_recall
|
|
value: [0.81981982 0.83333333 0.81981982 0.80630631 0.81081081 0.8018018
|
|
0.81081081 0.8206278 0.81165919 0.8161435 ]
|
|
|
|
mean value: 0.8151133195976246
|
|
|
|
key: test_roc_auc
|
|
value: [0.74 0.76 0.62 0.8 0.6525 0.73416667
|
|
0.69333333 0.77583333 0.75416667 0.7125 ]
|
|
|
|
mean value: 0.7242500000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.84009009 0.84234234 0.84234234 0.83108108 0.82020361 0.81794126
|
|
0.82020361 0.83598958 0.82700077 0.84050418]
|
|
|
|
mean value: 0.8317698864784067
|
|
|
|
key: test_jcc
|
|
value: [0.56666667 0.5862069 0.45714286 0.67741935 0.5 0.59375
|
|
0.54545455 0.63333333 0.5862069 0.51724138]
|
|
|
|
mean value: 0.5663421929849906
|
|
|
|
key: train_jcc
|
|
value: [0.71936759 0.7254902 0.72222222 0.70472441 0.69230769 0.68725869
|
|
0.69230769 0.71484375 0.70155039 0.71936759]
|
|
|
|
mean value: 0.7079440215086056
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01946115 0.01957774 0.01941347 0.01955652 0.01955462 0.0196085
|
|
0.01974106 0.01984978 0.01933622 0.01981211]
|
|
|
|
mean value: 0.019591116905212404
|
|
|
|
key: score_time
|
|
value: [0.01114154 0.0111351 0.0113771 0.01136541 0.01125026 0.01115489
|
|
0.01127148 0.01123953 0.01115251 0.01124001]
|
|
|
|
mean value: 0.011232781410217284
|
|
|
|
key: test_mcc
|
|
value: [0.76244374 0.85096294 0.61806423 0.6 0.7645166 0.81223286
|
|
0.71889189 0.73061343 0.63819901 0.88443328]
|
|
|
|
mean value: 0.7380357984054712
|
|
|
|
key: train_mcc
|
|
value: [0.83371511 0.80161103 0.85448661 0.83048943 0.82592641 0.82016391
|
|
0.81687801 0.83673263 0.81595225 0.82838049]
|
|
|
|
mean value: 0.8264335880768268
|
|
|
|
key: test_accuracy
|
|
value: [0.88 0.92 0.8 0.8 0.87755102 0.89795918
|
|
0.85714286 0.85714286 0.81632653 0.93877551]
|
|
|
|
mean value: 0.8644897959183674
|
|
|
|
key: train_accuracy
|
|
value: [0.91441441 0.89864865 0.92567568 0.91216216 0.91011236 0.90786517
|
|
0.90561798 0.91685393 0.90561798 0.91235955]
|
|
|
|
mean value: 0.9109327867193036
|
|
|
|
key: test_fscore
|
|
value: [0.88461538 0.92592593 0.82142857 0.8 0.88888889 0.90909091
|
|
0.86792453 0.86792453 0.82352941 0.94117647]
|
|
|
|
mean value: 0.8730504618906395
|
|
|
|
key: train_fscore
|
|
value: [0.91880342 0.90364026 0.9287257 0.91719745 0.91489362 0.91220557
|
|
0.9106383 0.92043011 0.9106383 0.91648822]
|
|
|
|
mean value: 0.915366094037861
|
|
|
|
key: test_precision
|
|
value: [0.85185185 0.86206897 0.74193548 0.8 0.82758621 0.83333333
|
|
0.82142857 0.79310345 0.77777778 0.88888889]
|
|
|
|
mean value: 0.8197974527841047
|
|
|
|
key: train_precision
|
|
value: [0.87398374 0.86122449 0.89211618 0.86746988 0.86693548 0.86938776
|
|
0.86290323 0.88429752 0.86639676 0.87704918]
|
|
|
|
mean value: 0.8721764218626092
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.92 0.8 0.96 1.
|
|
0.92 0.95833333 0.875 1. ]
|
|
|
|
mean value: 0.9353333333333333
|
|
|
|
key: train_recall
|
|
value: [0.96846847 0.95045045 0.96846847 0.97297297 0.96846847 0.95945946
|
|
0.96396396 0.95964126 0.95964126 0.95964126]
|
|
|
|
mean value: 0.9631176019068396
|
|
|
|
key: test_roc_auc
|
|
value: [0.88 0.92 0.8 0.8 0.87583333 0.89583333
|
|
0.85583333 0.85916667 0.8175 0.94 ]
|
|
|
|
mean value: 0.8644166666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.91441441 0.89864865 0.92567568 0.91216216 0.9102432 0.90798085
|
|
0.9057488 0.91675756 0.9054963 0.91225306]
|
|
|
|
mean value: 0.9109380681129561
|
|
|
|
key: test_jcc
|
|
value: [0.79310345 0.86206897 0.6969697 0.66666667 0.8 0.83333333
|
|
0.76666667 0.76666667 0.7 0.88888889]
|
|
|
|
mean value: 0.7774364332985023
|
|
|
|
key: train_jcc
|
|
value: [0.84980237 0.82421875 0.86693548 0.84705882 0.84313725 0.83858268
|
|
0.8359375 0.85258964 0.8359375 0.8458498 ]
|
|
|
|
mean value: 0.8440049804815001
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.87729144 1.87471628 1.75148225 1.9547739 1.96574044 1.90230727
|
|
1.83655858 1.8638792 1.79688931 2.38454366]
|
|
|
|
mean value: 1.9208182334899901
|
|
|
|
key: score_time
|
|
value: [0.01276207 0.01338768 0.01353097 0.0139966 0.01354647 0.01372766
|
|
0.01533937 0.01389694 0.01395631 0.01401591]
|
|
|
|
mean value: 0.013815999031066895
|
|
|
|
key: test_mcc
|
|
value: [0.84270097 0.96076892 0.76991885 0.81649658 0.92153718 1.
|
|
0.92153718 0.83666667 0.84757938 0.96 ]
|
|
|
|
mean value: 0.8877205729792697
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92 0.98 0.88 0.9 0.95918367 1.
|
|
0.95918367 0.91836735 0.91836735 0.97959184]
|
|
|
|
mean value: 0.941469387755102
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.98039216 0.88888889 0.88888889 0.95833333 1.
|
|
0.95833333 0.91666667 0.90909091 0.97959184]
|
|
|
|
mean value: 0.9396852680466126
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95652174 0.96153846 0.82758621 1. 1. 1.
|
|
1. 0.91666667 1. 0.96 ]
|
|
|
|
mean value: 0.9622313074232115
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88 1. 0.96 0.8 0.92 1.
|
|
0.92 0.91666667 0.83333333 1. ]
|
|
|
|
mean value: 0.923
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 0.98 0.88 0.9 0.96 1.
|
|
0.96 0.91833333 0.91666667 0.98 ]
|
|
|
|
mean value: 0.9415
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.96153846 0.8 0.8 0.92 1.
|
|
0.92 0.84615385 0.83333333 0.96 ]
|
|
|
|
mean value: 0.8887179487179487
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02397132 0.01844621 0.01904774 0.02022338 0.01720476 0.02058673
|
|
0.01822042 0.01578116 0.01732349 0.01825786]
|
|
|
|
mean value: 0.018906307220458985
|
|
|
|
key: score_time
|
|
value: [0.01240921 0.00936747 0.00928259 0.00913763 0.00883198 0.00892711
|
|
0.00901484 0.00884962 0.00885725 0.00887036]
|
|
|
|
mean value: 0.009354805946350098
|
|
|
|
key: test_mcc
|
|
value: [0.88640526 1. 0.92295821 0.96076892 1. 0.92153718
|
|
1. 0.96 0.95993456 0.87813144]
|
|
|
|
mean value: 0.9489735568062528
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94 1. 0.96 0.98 1. 0.95918367
|
|
1. 0.97959184 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9737142857142858
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 1. 0.96153846 0.97959184 1. 0.95833333
|
|
1. 0.97959184 0.9787234 0.93617021]
|
|
|
|
mean value: 0.9730119298128417
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.92592593 1. 1. 1.
|
|
1. 0.96 1. 0.95652174]
|
|
|
|
mean value: 0.9842447665056361
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88 1. 1. 0.96 1. 0.92
|
|
1. 1. 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9635
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 1. 0.96 0.98 1. 0.96
|
|
1. 0.98 0.97916667 0.93833333]
|
|
|
|
mean value: 0.97375
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88 1. 0.92592593 0.96 1. 0.92
|
|
1. 0.96 0.95833333 0.88 ]
|
|
|
|
mean value: 0.9484259259259259
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1148417 0.11432505 0.11538887 0.1149714 0.11462855 0.11497211
|
|
0.11392903 0.11417317 0.11374831 0.11403346]
|
|
|
|
mean value: 0.11450116634368897
|
|
|
|
key: score_time
|
|
value: [0.0176661 0.01796746 0.01774216 0.02182293 0.01767945 0.01764822
|
|
0.01750731 0.01759458 0.01750398 0.01764107]
|
|
|
|
mean value: 0.018077325820922852
|
|
|
|
key: test_mcc
|
|
value: [0.88070485 0.88070485 0.64465837 0.84270097 0.91833333 0.95993456
|
|
0.95993456 0.92153718 0.91833333 0.96 ]
|
|
|
|
mean value: 0.888684199816004
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94 0.94 0.82 0.92 0.95918367 0.97959184
|
|
0.97959184 0.95918367 0.95918367 0.97959184]
|
|
|
|
mean value: 0.9436326530612245
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.94117647 0.83018868 0.91666667 0.96 0.98039216
|
|
0.98039216 0.96 0.95833333 0.97959184]
|
|
|
|
mean value: 0.9445516810497784
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95833333 0.92307692 0.78571429 0.95652174 0.96 0.96153846
|
|
0.96153846 0.92307692 0.95833333 0.96 ]
|
|
|
|
mean value: 0.9348133460742156
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.92 0.96 0.88 0.88 0.96 1.
|
|
1. 1. 0.95833333 1. ]
|
|
|
|
mean value: 0.9558333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.94 0.82 0.92 0.95916667 0.97916667
|
|
0.97916667 0.96 0.95916667 0.98 ]
|
|
|
|
mean value: 0.9436666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.88888889 0.70967742 0.84615385 0.92307692 0.96153846
|
|
0.96153846 0.92307692 0.92 0.96 ]
|
|
|
|
mean value: 0.8978566308243727
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01025248 0.01000357 0.00998664 0.00994635 0.01008749 0.01015973
|
|
0.0100162 0.00998664 0.01035595 0.01015663]
|
|
|
|
mean value: 0.010095167160034179
|
|
|
|
key: score_time
|
|
value: [0.00865126 0.00862265 0.00873017 0.00881004 0.00864768 0.00865054
|
|
0.00874853 0.00869131 0.00858212 0.00869083]
|
|
|
|
mean value: 0.008682513236999511
|
|
|
|
key: test_mcc
|
|
value: [0.54886043 0.61806423 0.60783067 0.76991885 0.8136762 0.62837569
|
|
0.73061343 0.68145382 0.6750504 0.79666667]
|
|
|
|
mean value: 0.6870510394432654
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76 0.8 0.8 0.88 0.89795918 0.79591837
|
|
0.85714286 0.83673469 0.83673469 0.89795918]
|
|
|
|
mean value: 0.8362448979591837
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.77272727 0.7826087 0.86956522 0.88888889 0.76190476
|
|
0.84444444 0.81818182 0.82608696 0.89795918]
|
|
|
|
mean value: 0.8176652953671587
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.89473684 0.85714286 0.95238095 1. 0.94117647
|
|
0.95 0.9 0.86363636 0.88 ]
|
|
|
|
mean value: 0.9121426427030142
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.6 0.68 0.72 0.8 0.8 0.64
|
|
0.76 0.75 0.79166667 0.91666667]
|
|
|
|
mean value: 0.7458333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76 0.8 0.8 0.88 0.9 0.79916667
|
|
0.85916667 0.835 0.83583333 0.89833333]
|
|
|
|
mean value: 0.83675
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.62962963 0.64285714 0.76923077 0.8 0.61538462
|
|
0.73076923 0.69230769 0.7037037 0.81481481]
|
|
|
|
mean value: 0.6954253154253154
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.62627411 1.64739823 1.61766124 1.63053727 1.63497996 1.65494418
|
|
1.71418619 1.72269344 1.74215984 1.74299431]
|
|
|
|
mean value: 1.6733828783035278
|
|
|
|
key: score_time
|
|
value: [0.09133458 0.09024954 0.09035015 0.09040809 0.09026027 0.10087514
|
|
0.09136271 0.09849358 0.09876132 0.0995295 ]
|
|
|
|
mean value: 0.09416248798370361
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.96076892 0.84270097 0.96076892 0.96 1.
|
|
0.95993456 0.92153718 0.87813144 0.91833333]
|
|
|
|
mean value: 0.9362944251238476
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.98 0.92 0.98 0.97959184 1.
|
|
0.97959184 0.95918367 0.93877551 0.95918367]
|
|
|
|
mean value: 0.9676326530612245
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 0.92307692 0.97959184 0.97959184 1.
|
|
0.98039216 0.96 0.93617021 0.95833333]
|
|
|
|
mean value: 0.9677140293105786
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 0.88888889 1. 1. 1.
|
|
0.96153846 0.92307692 0.95652174 0.95833333]
|
|
|
|
mean value: 0.9649897807506503
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 0.96 0.96 1.
|
|
1. 1. 0.91666667 0.95833333]
|
|
|
|
mean value: 0.9715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.98 0.92 0.98 0.98 1.
|
|
0.97916667 0.96 0.93833333 0.95916667]
|
|
|
|
mean value: 0.9676666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 0.85714286 0.96 0.96 1.
|
|
0.96153846 0.92307692 0.88 0.92 ]
|
|
|
|
mean value: 0.9383296703296703
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.97712541 0.95402932 0.9370153 0.9889307 0.94077992 0.94789362
|
|
0.96213794 0.94727325 0.97255301 0.98577809]
|
|
|
|
mean value: 0.9613516569137573
|
|
|
|
key: score_time
|
|
value: [0.20900202 0.22341323 0.2311697 0.28326535 0.25937295 0.2417872
|
|
0.27976966 0.20503759 0.24860716 0.22998095]
|
|
|
|
mean value: 0.24114058017730713
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.96076892 0.84270097 0.80064077 0.95993456 0.92128466
|
|
0.92128466 0.92153718 0.87813144 0.91833333]
|
|
|
|
mean value: 0.9085385425408611
|
|
|
|
key: train_mcc
|
|
value: [0.97332853 0.98214142 0.98657657 0.96895027 0.97338915 0.97338915
|
|
0.98218183 0.97338596 0.97777378 0.9821804 ]
|
|
|
|
mean value: 0.977329705024023
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.98 0.92 0.9 0.97959184 0.95918367
|
|
0.95918367 0.95918367 0.93877551 0.95918367]
|
|
|
|
mean value: 0.9535102040816327
|
|
|
|
key: train_accuracy
|
|
value: [0.98648649 0.99099099 0.99324324 0.98423423 0.98651685 0.98651685
|
|
0.99101124 0.98651685 0.98876404 0.99101124]
|
|
|
|
mean value: 0.9885292033606641
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 0.92307692 0.90196078 0.98039216 0.96153846
|
|
0.96153846 0.96 0.93617021 0.95833333]
|
|
|
|
mean value: 0.9542994327027047
|
|
|
|
key: train_fscore
|
|
value: [0.98666667 0.99107143 0.99328859 0.98447894 0.98666667 0.98666667
|
|
0.99107143 0.98672566 0.98891353 0.99111111]
|
|
|
|
mean value: 0.9886660683772148
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 0.88888889 0.88461538 0.96153846 0.92592593
|
|
0.92592593 0.92307692 0.95652174 0.95833333]
|
|
|
|
mean value: 0.938636504397374
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.97368421 0.98230088 0.98666667 0.96943231 0.97368421 0.97368421
|
|
0.98230088 0.97379913 0.97807018 0.98237885]
|
|
|
|
mean value: 0.9776001539269301
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 0.92 1. 1.
|
|
1. 1. 0.91666667 0.95833333]
|
|
|
|
mean value: 0.9715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.98 0.92 0.9 0.97916667 0.95833333
|
|
0.95833333 0.96 0.93833333 0.95916667]
|
|
|
|
mean value: 0.9533333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.98648649 0.99099099 0.99324324 0.98423423 0.98654709 0.98654709
|
|
0.99103139 0.98648649 0.98873874 0.99099099]
|
|
|
|
mean value: 0.9885296731709288
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 0.85714286 0.82142857 0.96153846 0.92592593
|
|
0.92592593 0.92307692 0.88 0.92 ]
|
|
|
|
mean value: 0.9136577126577127
|
|
|
|
key: train_jcc
|
|
value: [0.97368421 0.98230088 0.98666667 0.96943231 0.97368421 0.97368421
|
|
0.98230088 0.97379913 0.97807018 0.98237885]
|
|
|
|
mean value: 0.9776001539269301
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02502823 0.01015925 0.01042533 0.01120496 0.01012278 0.01015449
|
|
0.01012135 0.01007318 0.01005602 0.01013899]
|
|
|
|
mean value: 0.011748456954956054
|
|
|
|
key: score_time
|
|
value: [0.01176095 0.00904202 0.00916862 0.00930667 0.0087595 0.00871396
|
|
0.00890446 0.00904727 0.00883031 0.00882578]
|
|
|
|
mean value: 0.009235954284667969
|
|
|
|
key: test_mcc
|
|
value: [0.60192927 0.40032038 0.52 0.56044854 0.65813164 0.59839104
|
|
0.51252158 0.59297231 0.63272208 0.6750504 ]
|
|
|
|
mean value: 0.5752487245999452
|
|
|
|
key: train_mcc
|
|
value: [0.63963964 0.66233017 0.67117798 0.65768435 0.62845024 0.63596332
|
|
0.64046624 0.64048076 0.63166622 0.66292294]
|
|
|
|
mean value: 0.6470781844201794
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.7 0.76 0.78 0.81632653 0.79591837
|
|
0.75510204 0.79591837 0.81632653 0.83673469]
|
|
|
|
mean value: 0.7856326530612245
|
|
|
|
key: train_accuracy
|
|
value: [0.81981982 0.83108108 0.83558559 0.82882883 0.81348315 0.81797753
|
|
0.82022472 0.82022472 0.81573034 0.83146067]
|
|
|
|
mean value: 0.8234416438910821
|
|
|
|
key: test_fscore
|
|
value: [0.79166667 0.70588235 0.76 0.7755102 0.84210526 0.81481481
|
|
0.75 0.7826087 0.80851064 0.82608696]
|
|
|
|
mean value: 0.7857185592133971
|
|
|
|
key: train_fscore
|
|
value: [0.81981982 0.83296214 0.83521445 0.8280543 0.81917211 0.81797753
|
|
0.81900452 0.81981982 0.81858407 0.83221477]
|
|
|
|
mean value: 0.824282352548306
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.69230769 0.76 0.79166667 0.75 0.75862069
|
|
0.7826087 0.81818182 0.82608696 0.86363636]
|
|
|
|
mean value: 0.7869195839143366
|
|
|
|
key: train_precision
|
|
value: [0.81981982 0.82378855 0.83710407 0.83181818 0.79324895 0.8161435
|
|
0.82272727 0.82352941 0.80786026 0.83035714]
|
|
|
|
mean value: 0.820639715255508
|
|
|
|
key: test_recall
|
|
value: [0.76 0.72 0.76 0.76 0.96 0.88
|
|
0.72 0.75 0.79166667 0.79166667]
|
|
|
|
mean value: 0.7893333333333333
|
|
|
|
key: train_recall
|
|
value: [0.81981982 0.84234234 0.83333333 0.82432432 0.84684685 0.81981982
|
|
0.81531532 0.8161435 0.82959641 0.83408072]
|
|
|
|
mean value: 0.8281622429604493
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.7 0.76 0.78 0.81333333 0.79416667
|
|
0.75583333 0.795 0.81583333 0.83583333]
|
|
|
|
mean value: 0.7849999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.81981982 0.83108108 0.83558559 0.82882883 0.81355795 0.81798166
|
|
0.82021371 0.82023391 0.81569911 0.83145477]
|
|
|
|
mean value: 0.8234456429523694
|
|
|
|
key: test_jcc
|
|
value: [0.65517241 0.54545455 0.61290323 0.63333333 0.72727273 0.6875
|
|
0.6 0.64285714 0.67857143 0.7037037 ]
|
|
|
|
mean value: 0.6486768520792436
|
|
|
|
key: train_jcc
|
|
value: [0.69465649 0.71374046 0.71705426 0.70656371 0.69372694 0.69201521
|
|
0.69348659 0.69465649 0.6928839 0.71264368]
|
|
|
|
mean value: 0.7011427714969269
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.07708216 0.06709576 0.06800151 0.07237768 0.07314706 0.07462096
|
|
0.06670213 0.07845426 0.07025003 0.07046866]
|
|
|
|
mean value: 0.07182002067565918
|
|
|
|
key: score_time
|
|
value: [0.01100016 0.01058912 0.01108718 0.01138806 0.01171041 0.01121306
|
|
0.01112843 0.01115584 0.01063395 0.011307 ]
|
|
|
|
mean value: 0.011121320724487304
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 1. 0.92295821 0.96076892 1. 0.95993456
|
|
0.96 0.96 0.95993456 0.87813144]
|
|
|
|
mean value: 0.9562496612854547
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98 1. 0.96 0.98 1. 0.97959184
|
|
0.97959184 0.97959184 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9777142857142858
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 1. 0.96153846 0.97959184 1. 0.98039216
|
|
0.97959184 0.97959184 0.9787234 0.93617021]
|
|
|
|
mean value: 0.9775191582361258
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.92592593 1. 1. 0.96153846
|
|
1. 0.96 1. 0.95652174]
|
|
|
|
mean value: 0.9803986126594822
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 0.96 1. 1.
|
|
0.96 1. 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9755
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 1. 0.96 0.98 1. 0.97916667
|
|
0.98 0.98 0.97916667 0.93833333]
|
|
|
|
mean value: 0.9776666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 1. 0.92592593 0.96 1. 0.96153846
|
|
0.96 0.96 0.95833333 0.88 ]
|
|
|
|
mean value: 0.9565797720797721
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04628181 0.05825949 0.05627584 0.04663467 0.07689404 0.0415566
|
|
0.08275175 0.0420351 0.07374287 0.08435965]
|
|
|
|
mean value: 0.060879182815551755
|
|
|
|
key: score_time
|
|
value: [0.01931405 0.01220775 0.01228547 0.01941514 0.01230836 0.01246762
|
|
0.01223207 0.01222324 0.01220465 0.02117896]
|
|
|
|
mean value: 0.014583730697631836
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 1. 0.84 0.71774056 0.83973406 0.715
|
|
0.83666667 0.87813144 0.92128466 0.91833333]
|
|
|
|
mean value: 0.8589848932618054
|
|
|
|
key: train_mcc
|
|
value: [0.97748739 0.97748739 0.97748739 0.97748739 0.97319193 0.96854927
|
|
0.96408444 0.97753762 0.97303357 0.97303357]
|
|
|
|
mean value: 0.9739379972893354
|
|
|
|
key: test_accuracy
|
|
value: [0.96 1. 0.92 0.84 0.91836735 0.85714286
|
|
0.91836735 0.93877551 0.95918367 0.95918367]
|
|
|
|
mean value: 0.9271020408163265
|
|
|
|
key: train_accuracy
|
|
value: [0.98873874 0.98873874 0.98873874 0.98873874 0.98651685 0.98426966
|
|
0.98202247 0.98876404 0.98651685 0.98651685]
|
|
|
|
mean value: 0.9869561696527989
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 1. 0.92 0.80952381 0.91666667 0.85714286
|
|
0.92 0.93617021 0.95652174 0.95833333]
|
|
|
|
mean value: 0.9232691951896392
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 0.98876404 0.98871332 0.98876404 0.98660714 0.98426966
|
|
0.98206278 0.98881432 0.98654709 0.98654709]
|
|
|
|
mean value: 0.98698535272404
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.92 1. 0.95652174 0.875
|
|
0.92 0.95652174 1. 0.95833333]
|
|
|
|
mean value: 0.9586376811594203
|
|
|
|
key: train_precision
|
|
value: [0.98654709 0.98654709 0.99095023 0.98654709 0.97787611 0.98206278
|
|
0.97767857 0.98660714 0.98654709 0.98654709]
|
|
|
|
mean value: 0.9847910253002775
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.92 0.68 0.88 0.84
|
|
0.92 0.91666667 0.91666667 0.95833333]
|
|
|
|
mean value: 0.8951666666666667
|
|
|
|
key: train_recall
|
|
value: [0.99099099 0.99099099 0.98648649 0.99099099 0.9954955 0.98648649
|
|
0.98648649 0.99103139 0.98654709 0.98654709]
|
|
|
|
mean value: 0.9892053488466045
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 1. 0.92 0.84 0.91916667 0.8575
|
|
0.91833333 0.93833333 0.95833333 0.95916667]
|
|
|
|
mean value: 0.9270833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.98873874 0.98873874 0.98873874 0.98873874 0.98653699 0.98427463
|
|
0.98203248 0.98875894 0.98651679 0.98651679]
|
|
|
|
mean value: 0.986959156465883
|
|
|
|
key: test_jcc
|
|
value: [0.92 1. 0.85185185 0.68 0.84615385 0.75
|
|
0.85185185 0.88 0.91666667 0.92 ]
|
|
|
|
mean value: 0.8616524216524216
|
|
|
|
key: train_jcc
|
|
value: [0.97777778 0.97777778 0.97767857 0.97777778 0.97356828 0.96902655
|
|
0.96475771 0.97787611 0.97345133 0.97345133]
|
|
|
|
mean value: 0.9743143205685845
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02518463 0.01006675 0.00982952 0.00961113 0.00949383 0.009547
|
|
0.00988173 0.00991678 0.00980306 0.00975633]
|
|
|
|
mean value: 0.011309075355529784
|
|
|
|
key: score_time
|
|
value: [0.00912738 0.00898457 0.00877213 0.00848055 0.00863981 0.00878549
|
|
0.00868893 0.00860167 0.00891256 0.0086875 ]
|
|
|
|
mean value: 0.008768057823181153
|
|
|
|
key: test_mcc
|
|
value: [0.64051262 0.52678658 0.64465837 0.60192927 0.57236448 0.6446564
|
|
0.79632832 0.63333333 0.46911585 0.67333333]
|
|
|
|
mean value: 0.6203018552226107
|
|
|
|
key: train_mcc
|
|
value: [0.64604111 0.60047136 0.70842288 0.70882709 0.62621338 0.63946187
|
|
0.68642861 0.72641101 0.66582162 0.60599784]
|
|
|
|
mean value: 0.6614096763244641
|
|
|
|
key: test_accuracy
|
|
value: [0.82 0.76 0.82 0.8 0.7755102 0.81632653
|
|
0.89795918 0.81632653 0.73469388 0.83673469]
|
|
|
|
mean value: 0.8077551020408164
|
|
|
|
key: train_accuracy
|
|
value: [0.82207207 0.79954955 0.8536036 0.8536036 0.81123596 0.81797753
|
|
0.84269663 0.86292135 0.83146067 0.80224719]
|
|
|
|
mean value: 0.8297368154671526
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.77777778 0.83018868 0.79166667 0.80701754 0.83636364
|
|
0.90196078 0.81632653 0.72340426 0.83333333]
|
|
|
|
mean value: 0.8141568619256172
|
|
|
|
key: train_fscore
|
|
value: [0.82863341 0.80610022 0.85776805 0.8583878 0.82051282 0.82655246
|
|
0.84649123 0.86593407 0.83940043 0.80952381]
|
|
|
|
mean value: 0.8359304290418681
|
|
|
|
key: test_precision
|
|
value: [0.80769231 0.72413793 0.78571429 0.82608696 0.71875 0.76666667
|
|
0.88461538 0.8 0.73913043 0.83333333]
|
|
|
|
mean value: 0.7886127300360809
|
|
|
|
key: train_precision
|
|
value: [0.79916318 0.78059072 0.83404255 0.83122363 0.7804878 0.7877551
|
|
0.82478632 0.84913793 0.80327869 0.78242678]
|
|
|
|
mean value: 0.807289270860631
|
|
|
|
key: test_recall
|
|
value: [0.84 0.84 0.88 0.76 0.92 0.92
|
|
0.92 0.83333333 0.70833333 0.83333333]
|
|
|
|
mean value: 0.8455
|
|
|
|
key: train_recall
|
|
value: [0.86036036 0.83333333 0.88288288 0.88738739 0.86486486 0.86936937
|
|
0.86936937 0.88340807 0.87892377 0.83856502]
|
|
|
|
mean value: 0.8668464428554115
|
|
|
|
key: test_roc_auc
|
|
value: [0.82 0.76 0.82 0.8 0.7725 0.81416667
|
|
0.8975 0.81666667 0.73416667 0.83666667]
|
|
|
|
mean value: 0.8071666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.82207207 0.79954955 0.8536036 0.8536036 0.8113562 0.81809276
|
|
0.84275643 0.86287521 0.83135378 0.80216539]
|
|
|
|
mean value: 0.8297428594513796
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.63636364 0.70967742 0.65517241 0.67647059 0.71875
|
|
0.82142857 0.68965517 0.56666667 0.71428571]
|
|
|
|
mean value: 0.6888470182541618
|
|
|
|
key: train_jcc
|
|
value: [0.70740741 0.67518248 0.75095785 0.7519084 0.69565217 0.70437956
|
|
0.7338403 0.76356589 0.72324723 0.68 ]
|
|
|
|
mean value: 0.7186141304596468
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01906443 0.0277431 0.02435446 0.02984214 0.02284908 0.02623963
|
|
0.03047895 0.02620649 0.02377176 0.01955438]
|
|
|
|
mean value: 0.025010442733764647
|
|
|
|
key: score_time
|
|
value: [0.0101335 0.01798868 0.01181626 0.01181197 0.01211691 0.01187253
|
|
0.01185322 0.01176381 0.01180267 0.01184225]
|
|
|
|
mean value: 0.01230018138885498
|
|
|
|
key: test_mcc
|
|
value: [0.88640526 0.92 0.84270097 0.68599434 0.92153718 0.95993456
|
|
0.88443328 0.75793094 0.87833333 0.92153718]
|
|
|
|
mean value: 0.8658807037125982
|
|
|
|
key: train_mcc
|
|
value: [0.96412048 0.97756674 0.9687142 0.79520623 0.96404476 0.97303357
|
|
0.97318977 0.96439334 0.96002279 0.95509545]
|
|
|
|
mean value: 0.9495387318884156
|
|
|
|
key: test_accuracy
|
|
value: [0.94 0.96 0.92 0.82 0.95918367 0.97959184
|
|
0.93877551 0.87755102 0.93877551 0.95918367]
|
|
|
|
mean value: 0.9293061224489796
|
|
|
|
key: train_accuracy
|
|
value: [0.98198198 0.98873874 0.98423423 0.88738739 0.98202247 0.98651685
|
|
0.98651685 0.98202247 0.97977528 0.97752809]
|
|
|
|
mean value: 0.9736724364814252
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 0.96 0.92307692 0.7804878 0.95833333 0.98039216
|
|
0.93617021 0.88 0.93877551 0.96 ]
|
|
|
|
mean value: 0.9253406153887047
|
|
|
|
key: train_fscore
|
|
value: [0.98181818 0.98866213 0.98405467 0.87309645 0.98198198 0.98648649
|
|
0.98636364 0.98230088 0.98013245 0.97747748]
|
|
|
|
mean value: 0.9722374347338296
|
|
|
|
key: test_precision
|
|
value: [1. 0.96 0.88888889 1. 1. 0.96153846
|
|
1. 0.84615385 0.92 0.92307692]
|
|
|
|
mean value: 0.949965811965812
|
|
|
|
key: train_precision
|
|
value: [0.99082569 0.99543379 0.99539171 1. 0.98198198 0.98648649
|
|
0.99541284 0.96943231 0.96521739 0.98190045]
|
|
|
|
mean value: 0.9862082653805538
|
|
|
|
key: test_recall
|
|
value: [0.88 0.96 0.96 0.64 0.92 1.
|
|
0.88 0.91666667 0.95833333 1. ]
|
|
|
|
mean value: 0.9115
|
|
|
|
key: train_recall
|
|
value: [0.97297297 0.98198198 0.97297297 0.77477477 0.98198198 0.98648649
|
|
0.97747748 0.9955157 0.9955157 0.97309417]
|
|
|
|
mean value: 0.9612774209186765
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.96 0.92 0.82 0.96 0.97916667
|
|
0.94 0.87833333 0.93916667 0.96 ]
|
|
|
|
mean value: 0.9296666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.98198198 0.98873874 0.98423423 0.88738739 0.98202238 0.98651679
|
|
0.98649659 0.98199208 0.97973983 0.97753808]
|
|
|
|
mean value: 0.973664808306064
|
|
|
|
key: test_jcc
|
|
value: [0.88 0.92307692 0.85714286 0.64 0.92 0.96153846
|
|
0.88 0.78571429 0.88461538 0.92307692]
|
|
|
|
mean value: 0.8655164835164835
|
|
|
|
key: train_jcc
|
|
value: [0.96428571 0.97757848 0.96860987 0.77477477 0.96460177 0.97333333
|
|
0.97309417 0.96521739 0.96103896 0.95594714]
|
|
|
|
mean value: 0.9478481592423275
|
|
|
|
MCC on Blind test: 0.89
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01888323 0.0200851 0.02039981 0.02149796 0.02013373 0.01909614
|
|
0.01854086 0.01984286 0.02030134 0.02071524]
|
|
|
|
mean value: 0.019949626922607423
|
|
|
|
key: score_time
|
|
value: [0.01195025 0.01184821 0.01173782 0.01182914 0.01186275 0.01190805
|
|
0.01179409 0.01210928 0.0120101 0.01210546]
|
|
|
|
mean value: 0.01191551685333252
|
|
|
|
key: test_mcc
|
|
value: [0.78288136 0.96076892 0.84270097 0.78288136 0.91833333 0.77771377
|
|
0.77771377 0.6446564 0.77771377 0.91833333]
|
|
|
|
mean value: 0.8183696994652161
|
|
|
|
key: train_mcc
|
|
value: [0.91598627 0.95092329 0.96855691 0.9345682 0.93418826 0.9257121
|
|
0.84944449 0.95540608 0.89578084 0.98202238]
|
|
|
|
mean value: 0.9312588821662183
|
|
|
|
key: test_accuracy
|
|
value: [0.88 0.98 0.92 0.88 0.95918367 0.87755102
|
|
0.87755102 0.81632653 0.87755102 0.95918367]
|
|
|
|
mean value: 0.902734693877551
|
|
|
|
key: train_accuracy
|
|
value: [0.95720721 0.97522523 0.98423423 0.96621622 0.96629213 0.96179775
|
|
0.91910112 0.97752809 0.94606742 0.99101124]
|
|
|
|
mean value: 0.9644680635691871
|
|
|
|
key: test_fscore
|
|
value: [0.86363636 0.98039216 0.92307692 0.86363636 0.96 0.89285714
|
|
0.89285714 0.79069767 0.85714286 0.95833333]
|
|
|
|
mean value: 0.8982629957821476
|
|
|
|
key: train_fscore
|
|
value: [0.95591647 0.97560976 0.98412698 0.96503497 0.96717724 0.96296296
|
|
0.925 0.97727273 0.94366197 0.99103139]
|
|
|
|
mean value: 0.9647794473666983
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 0.88888889 1. 0.96 0.80645161
|
|
0.80645161 0.89473684 1. 0.95833333]
|
|
|
|
mean value: 0.9276400751672398
|
|
|
|
key: train_precision
|
|
value: [0.98564593 0.96069869 0.99086758 1. 0.94042553 0.93248945
|
|
0.86046512 0.99078341 0.99014778 0.99103139]
|
|
|
|
mean value: 0.9642554886074128
|
|
|
|
key: test_recall
|
|
value: [0.76 1. 0.96 0.76 0.96 1.
|
|
1. 0.70833333 0.75 0.95833333]
|
|
|
|
mean value: 0.8856666666666667
|
|
|
|
key: train_recall
|
|
value: [0.92792793 0.99099099 0.97747748 0.93243243 0.9954955 0.9954955
|
|
1. 0.96412556 0.90134529 0.99103139]
|
|
|
|
mean value: 0.9676322061972287
|
|
|
|
key: test_roc_auc
|
|
value: [0.88 0.98 0.92 0.88 0.95916667 0.875
|
|
0.875 0.81416667 0.875 0.95916667]
|
|
|
|
mean value: 0.90175
|
|
|
|
key: train_roc_auc
|
|
value: [0.95720721 0.97522523 0.98423423 0.96621622 0.96635761 0.96187331
|
|
0.91928251 0.97755828 0.94616814 0.99101119]
|
|
|
|
mean value: 0.9645133923160829
|
|
|
|
key: test_jcc
|
|
value: [0.76 0.96153846 0.85714286 0.76 0.92307692 0.80645161
|
|
0.80645161 0.65384615 0.75 0.92 ]
|
|
|
|
mean value: 0.8198507621410848
|
|
|
|
key: train_jcc
|
|
value: [0.91555556 0.95238095 0.96875 0.93243243 0.93644068 0.92857143
|
|
0.86046512 0.95555556 0.89333333 0.98222222]
|
|
|
|
mean value: 0.9325707274296652
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17979002 0.16478252 0.16317677 0.1616292 0.16364074 0.16254401
|
|
0.16340542 0.16373444 0.16396976 0.16625404]
|
|
|
|
mean value: 0.16529269218444825
|
|
|
|
key: score_time
|
|
value: [0.01511502 0.01513171 0.01514769 0.01489496 0.01536655 0.01533961
|
|
0.0151093 0.01518273 0.01568508 0.01588202]
|
|
|
|
mean value: 0.015285468101501465
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.96076892 0.88640526 1. 0.96 0.95993456
|
|
1. 0.91833333 0.95993456 0.87813144]
|
|
|
|
mean value: 0.9484276999624902
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.98 0.94 1. 0.97959184 0.97959184
|
|
1. 0.95918367 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9736734693877551
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.97959184 0.94339623 1. 0.97959184 0.98039216
|
|
1. 0.95833333 0.9787234 0.93617021]
|
|
|
|
mean value: 0.9735790843836531
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.89285714 1. 1. 0.96153846
|
|
1. 0.95833333 1. 0.95652174]
|
|
|
|
mean value: 0.9769250676859372
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 0.96 1. 1. 0.96 1.
|
|
1. 0.95833333 0.95833333 0.91666667]
|
|
|
|
mean value: 0.9713333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.98 0.94 1. 0.98 0.97916667
|
|
1. 0.95916667 0.97916667 0.93833333]
|
|
|
|
mean value: 0.9735833333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96 0.89285714 1. 0.96 0.96153846
|
|
1. 0.92 0.95833333 0.88 ]
|
|
|
|
mean value: 0.9492728937728938
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05750608 0.06154442 0.07337785 0.05855465 0.06095672 0.05984592
|
|
0.05733228 0.06260085 0.08742285 0.05867982]
|
|
|
|
mean value: 0.06378214359283448
|
|
|
|
key: score_time
|
|
value: [0.02558541 0.02478409 0.02458358 0.02156138 0.03384137 0.02470827
|
|
0.02753186 0.03048611 0.02806687 0.02013803]
|
|
|
|
mean value: 0.026128697395324706
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 1. 0.92295821 0.96076892 0.88443328 1.
|
|
0.96 0.96 0.92128466 0.83920658]
|
|
|
|
mean value: 0.9409420569809102
|
|
|
|
key: train_mcc
|
|
value: [0.99099099 0.98649649 0.9955056 0.98649649 0.99101119 0.9955157
|
|
0.98652661 0.98218183 0.99105141 0.99105141]
|
|
|
|
mean value: 0.989682771886615
|
|
|
|
key: test_accuracy
|
|
value: [0.98 1. 0.96 0.98 0.93877551 1.
|
|
0.97959184 0.97959184 0.95918367 0.91836735]
|
|
|
|
mean value: 0.9695510204081632
|
|
|
|
key: train_accuracy
|
|
value: [0.9954955 0.99324324 0.99774775 0.99324324 0.99550562 0.99775281
|
|
0.99325843 0.99101124 0.99550562 0.99550562]
|
|
|
|
mean value: 0.9948269055572426
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 1. 0.96153846 0.97959184 0.93617021 1.
|
|
0.97959184 0.97959184 0.95652174 0.91304348]
|
|
|
|
mean value: 0.9685641238634499
|
|
|
|
key: train_fscore
|
|
value: [0.9954955 0.99322799 0.99774266 0.99322799 0.9954955 0.99775281
|
|
0.99322799 0.99095023 0.9954955 0.9954955 ]
|
|
|
|
mean value: 0.9948111653783939
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.92592593 1. 1. 1.
|
|
1. 0.96 1. 0.95454545]
|
|
|
|
mean value: 0.9840471380471381
|
|
|
|
key: train_precision
|
|
value: [0.9954955 0.99547511 1. 0.99547511 0.9954955 0.9955157
|
|
0.99547511 1. 1. 1. ]
|
|
|
|
mean value: 0.9972932025424771
|
|
|
|
key: test_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.96 1. 1. 0.96 0.88 1.
|
|
0.96 1. 0.91666667 0.875 ]
|
|
|
|
mean value: 0.9551666666666666
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.99099099 0.9954955 0.99099099 0.9954955 1.
|
|
0.99099099 0.98206278 0.99103139 0.99103139]
|
|
|
|
mean value: 0.9923585019997576
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 1. 0.96 0.98 0.94 1.
|
|
0.98 0.98 0.95833333 0.9175 ]
|
|
|
|
mean value: 0.9695833333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.9954955 0.99324324 0.99774775 0.99324324 0.9955056 0.99775785
|
|
0.99325334 0.99103139 0.9955157 0.9955157 ]
|
|
|
|
mean value: 0.9948309295842929
|
|
|
|
key: test_jcc
|
|
value: [0.96 1. 0.92592593 0.96 0.88 1.
|
|
0.96 0.96 0.91666667 0.84 ]
|
|
|
|
mean value: 0.9402592592592592
|
|
|
|
key: train_jcc
|
|
value: [0.99103139 0.98654709 0.9954955 0.98654709 0.99103139 0.9955157
|
|
0.98654709 0.98206278 0.99103139 0.99103139]
|
|
|
|
mean value: 0.9896840786975316
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14369655 0.16230178 0.1307528 0.08277488 0.10019708 0.15200377
|
|
0.16148877 0.17856431 0.12056184 0.1690824 ]
|
|
|
|
mean value: 0.14014241695404053
|
|
|
|
key: score_time
|
|
value: [0.02457857 0.02421474 0.01468205 0.01463795 0.02461195 0.02440977
|
|
0.02177668 0.02866268 0.01503062 0.02985239]
|
|
|
|
mean value: 0.02224574089050293
|
|
|
|
key: test_mcc
|
|
value: [0.64465837 0.60783067 0.56044854 0.64051262 0.715 0.75712849
|
|
0.80235519 0.79666667 0.68145382 0.87813144]
|
|
|
|
mean value: 0.7084185803827924
|
|
|
|
key: train_mcc
|
|
value: [0.99099099 0.98649649 0.99103121 0.98649649 0.98652689 0.99101119
|
|
0.9955157 0.9820617 0.97761752 0.98652689]
|
|
|
|
mean value: 0.9874275061447364
|
|
|
|
key: test_accuracy
|
|
value: [0.82 0.8 0.78 0.82 0.85714286 0.87755102
|
|
0.89795918 0.89795918 0.83673469 0.93877551]
|
|
|
|
mean value: 0.8526122448979592
|
|
|
|
key: train_accuracy
|
|
value: [0.9954955 0.99324324 0.9954955 0.99324324 0.99325843 0.99550562
|
|
0.99775281 0.99101124 0.98876404 0.99325843]
|
|
|
|
mean value: 0.9937028039275231
|
|
|
|
key: test_fscore
|
|
value: [0.80851064 0.7826087 0.78431373 0.81632653 0.85714286 0.88461538
|
|
0.89361702 0.89795918 0.81818182 0.93617021]
|
|
|
|
mean value: 0.847944606770857
|
|
|
|
key: train_fscore
|
|
value: [0.9954955 0.99322799 0.99547511 0.99325843 0.99325843 0.9954955
|
|
0.99775281 0.99107143 0.98871332 0.99325843]
|
|
|
|
mean value: 0.9937006931827311
|
|
|
|
key: test_precision
|
|
value: [0.86363636 0.85714286 0.76923077 0.83333333 0.875 0.85185185
|
|
0.95454545 0.88 0.9 0.95652174]
|
|
|
|
mean value: 0.8741262368871064
|
|
|
|
key: train_precision
|
|
value: [0.9954955 0.99547511 1. 0.99103139 0.99103139 0.9954955
|
|
0.9955157 0.98666667 0.99545455 0.9954955 ]
|
|
|
|
mean value: 0.9941661287066194
|
|
|
|
key: test_recall
|
|
value: [0.76 0.72 0.8 0.8 0.84 0.92
|
|
0.84 0.91666667 0.75 0.91666667]
|
|
|
|
mean value: 0.8263333333333334
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.99099099 0.99099099 0.9954955 0.9954955 0.9954955
|
|
1. 0.9955157 0.98206278 0.99103139]
|
|
|
|
mean value: 0.9932573829434816
|
|
|
|
key: test_roc_auc
|
|
value: [0.82 0.8 0.78 0.82 0.8575 0.87666667
|
|
0.89916667 0.89833333 0.835 0.93833333]
|
|
|
|
mean value: 0.8525
|
|
|
|
key: train_roc_auc
|
|
value: [0.9954955 0.99324324 0.9954955 0.99324324 0.99326344 0.9955056
|
|
0.99775785 0.99100109 0.98877914 0.99326344]
|
|
|
|
mean value: 0.9937048034581667
|
|
|
|
key: test_jcc
|
|
value: [0.67857143 0.64285714 0.64516129 0.68965517 0.75 0.79310345
|
|
0.80769231 0.81481481 0.69230769 0.88 ]
|
|
|
|
mean value: 0.7394163297255623
|
|
|
|
key: train_jcc
|
|
value: [0.99103139 0.98654709 0.99099099 0.98660714 0.98660714 0.99103139
|
|
0.9955157 0.98230088 0.97767857 0.98660714]
|
|
|
|
mean value: 0.987491743648486
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.63235569 0.63317466 0.63071823 0.62365651 0.62968159 0.62717772
|
|
0.62779069 0.63667822 0.6356523 0.63522553]
|
|
|
|
mean value: 0.6312111139297485
|
|
|
|
key: score_time
|
|
value: [0.01033831 0.00926232 0.00931191 0.00936294 0.00961137 0.00936007
|
|
0.00931549 0.00933266 0.00941968 0.00937819]
|
|
|
|
mean value: 0.009469294548034668
|
|
|
|
key: test_mcc
|
|
value: [0.88640526 1. 0.92295821 0.96076892 0.88443328 1.
|
|
0.96 0.96 1. 0.96 ]
|
|
|
|
mean value: 0.9534565667677445
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94 1. 0.96 0.98 0.93877551 1.
|
|
0.97959184 0.97959184 1. 0.97959184]
|
|
|
|
mean value: 0.9757551020408163
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 1. 0.96153846 0.97959184 0.93617021 1.
|
|
0.97959184 0.97959184 1. 0.97959184]
|
|
|
|
mean value: 0.9752246234009152
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.92592593 1. 1. 1.
|
|
1. 0.96 1. 0.96 ]
|
|
|
|
mean value: 0.9845925925925926
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88 1. 1. 0.96 0.88 1. 0.96 1. 1. 1. ]
|
|
|
|
mean value: 0.968
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 1. 0.96 0.98 0.94 1. 0.98 0.98 1. 0.98]
|
|
|
|
mean value: 0.976
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88 1. 0.92592593 0.96 0.88 1.
|
|
0.96 0.96 1. 0.96 ]
|
|
|
|
mean value: 0.9525925925925925
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02870512 0.0294354 0.03010893 0.02970076 0.02953982 0.02973127
|
|
0.03004742 0.02984238 0.03031874 0.03292584]
|
|
|
|
mean value: 0.030035567283630372
|
|
|
|
key: score_time
|
|
value: [0.01188898 0.01486874 0.01477027 0.01470542 0.01482105 0.01474309
|
|
0.0147965 0.01471829 0.01461315 0.01277113]
|
|
|
|
mean value: 0.014269661903381348
|
|
|
|
key: test_mcc
|
|
value: [0.84 0.65319726 0.52167203 0.72524067 0.87813144 0.60834499
|
|
0.75793094 0.69595532 0.76603235 0.73061343]
|
|
|
|
mean value: 0.7177118423650555
|
|
|
|
key: train_mcc
|
|
value: [0.93880559 0.89306822 0.93244189 0.97332853 0.96033278 0.94640809
|
|
0.93893048 0.9418135 0.92434482 0.95080412]
|
|
|
|
mean value: 0.9400278020076293
|
|
|
|
key: test_accuracy
|
|
value: [0.92 0.82 0.76 0.86 0.93877551 0.79591837
|
|
0.87755102 0.83673469 0.87755102 0.85714286]
|
|
|
|
mean value: 0.8543673469387755
|
|
|
|
key: train_accuracy
|
|
value: [0.96846847 0.94369369 0.96621622 0.98648649 0.97977528 0.97303371
|
|
0.96853933 0.97078652 0.96179775 0.9752809 ]
|
|
|
|
mean value: 0.9694078348010933
|
|
|
|
key: test_fscore
|
|
value: [0.92 0.8 0.76923077 0.86792453 0.94117647 0.82142857
|
|
0.875 0.85185185 0.88461538 0.86792453]
|
|
|
|
mean value: 0.8599152104318586
|
|
|
|
key: train_fscore
|
|
value: [0.96744186 0.94033413 0.96629213 0.98630137 0.97931034 0.97260274
|
|
0.96744186 0.97052154 0.96263736 0.97505669]
|
|
|
|
mean value: 0.9687940032986482
|
|
|
|
key: test_precision
|
|
value: [0.92 0.9 0.74074074 0.82142857 0.92307692 0.74193548
|
|
0.91304348 0.76666667 0.82142857 0.79310345]
|
|
|
|
mean value: 0.8341423883749173
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96412556 1. 1. 0.98611111
|
|
1. 0.98165138 0.94396552 0.98623853]
|
|
|
|
mean value: 0.9862092097147488
|
|
|
|
key: test_recall
|
|
value: [0.92 0.72 0.8 0.92 0.96 0.92
|
|
0.84 0.95833333 0.95833333 0.95833333]
|
|
|
|
mean value: 0.8955
|
|
|
|
key: train_recall
|
|
value: [0.93693694 0.88738739 0.96846847 0.97297297 0.95945946 0.95945946
|
|
0.93693694 0.95964126 0.98206278 0.96412556]
|
|
|
|
mean value: 0.9527451218034177
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 0.82 0.76 0.86 0.93833333 0.79333333
|
|
0.87833333 0.83916667 0.87916667 0.85916667]
|
|
|
|
mean value: 0.85475
|
|
|
|
key: train_roc_auc
|
|
value: [0.96846847 0.94369369 0.96621622 0.98648649 0.97972973 0.97300327
|
|
0.96846847 0.97081162 0.96175211 0.97530602]
|
|
|
|
mean value: 0.9693936088554923
|
|
|
|
key: test_jcc
|
|
value: [0.85185185 0.66666667 0.625 0.76666667 0.88888889 0.6969697
|
|
0.77777778 0.74193548 0.79310345 0.76666667]
|
|
|
|
mean value: 0.7575527147635045
|
|
|
|
key: train_jcc
|
|
value: [0.93693694 0.88738739 0.93478261 0.97297297 0.95945946 0.94666667
|
|
0.93693694 0.94273128 0.9279661 0.95132743]
|
|
|
|
mean value: 0.9397167781912286
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02890158 0.03465581 0.09217739 0.04436755 0.02121997 0.03819394
|
|
0.03851271 0.03891659 0.03898382 0.03866243]
|
|
|
|
mean value: 0.041459178924560545
|
|
|
|
key: score_time
|
|
value: [0.01893306 0.01491523 0.014009 0.01667905 0.0229218 0.02178884
|
|
0.02281857 0.02161932 0.0234673 0.02316618]
|
|
|
|
mean value: 0.020031833648681642
|
|
|
|
key: test_mcc
|
|
value: [0.92 1. 0.84270097 0.80064077 0.91833333 0.92128466
|
|
0.87813144 0.87833333 0.91833333 0.96 ]
|
|
|
|
mean value: 0.9037757845345274
|
|
|
|
key: train_mcc
|
|
value: [0.96412048 0.95499371 0.97301246 0.95954708 0.95080412 0.96420203
|
|
0.95979475 0.96862627 0.95540071 0.96862627]
|
|
|
|
mean value: 0.9619127879040015
|
|
|
|
key: test_accuracy
|
|
value: [0.96 1. 0.92 0.9 0.95918367 0.95918367
|
|
0.93877551 0.93877551 0.95918367 0.97959184]
|
|
|
|
mean value: 0.951469387755102
|
|
|
|
key: train_accuracy
|
|
value: [0.98198198 0.97747748 0.98648649 0.97972973 0.9752809 0.98202247
|
|
0.97977528 0.98426966 0.97752809 0.98426966]
|
|
|
|
mean value: 0.9808821743091406
|
|
|
|
key: test_fscore
|
|
value: [0.96 1. 0.92307692 0.89795918 0.96 0.96153846
|
|
0.94117647 0.93877551 0.95833333 0.97959184]
|
|
|
|
mean value: 0.9520451719149198
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:148: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:151: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98214286 0.97757848 0.98654709 0.97986577 0.97550111 0.98214286
|
|
0.97995546 0.9844098 0.97787611 0.9844098 ]
|
|
|
|
mean value: 0.9810429322095635
|
|
|
|
key: test_precision
|
|
value: [0.96 1. 0.88888889 0.91666667 0.96 0.92592593
|
|
0.92307692 0.92 0.95833333 0.96 ]
|
|
|
|
mean value: 0.9412891737891738
|
|
|
|
key: train_precision
|
|
value: [0.97345133 0.97321429 0.98214286 0.97333333 0.96475771 0.97345133
|
|
0.969163 0.97787611 0.9650655 0.97787611]
|
|
|
|
mean value: 0.9730331550476334
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 0.88 0.96 1.
|
|
0.96 0.95833333 0.95833333 1. ]
|
|
|
|
mean value: 0.9636666666666667
|
|
|
|
key: train_recall
|
|
value: [0.99099099 0.98198198 0.99099099 0.98648649 0.98648649 0.99099099
|
|
0.99099099 0.99103139 0.99103139 0.99103139]
|
|
|
|
mean value: 0.9892013089322507
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 1. 0.92 0.9 0.95916667 0.95833333
|
|
0.93833333 0.93916667 0.95916667 0.98 ]
|
|
|
|
mean value: 0.9514166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.98198198 0.97747748 0.98648649 0.97972973 0.97530602 0.98204258
|
|
0.97980043 0.98425443 0.97749768 0.98425443]
|
|
|
|
mean value: 0.9808831252777441
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 1. 0.85714286 0.81481481 0.92307692 0.92592593
|
|
0.88888889 0.88461538 0.92 0.96 ]
|
|
|
|
mean value: 0.9097541717541717
|
|
|
|
key: train_jcc
|
|
value: [0.96491228 0.95614035 0.97345133 0.96052632 0.95217391 0.96491228
|
|
0.96069869 0.96929825 0.95670996 0.96929825]
|
|
|
|
mean value: 0.9628121606441641
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.27218747 0.26127696 0.18785405 0.28764963 0.40172148 0.40487075
|
|
0.26473737 0.18109083 0.34061122 0.30895209]
|
|
|
|
mean value: 0.2910951852798462
|
|
|
|
key: score_time
|
|
value: [0.01880717 0.01218629 0.01881289 0.01893997 0.0226512 0.01253533
|
|
0.02504444 0.02335548 0.0281117 0.02712703]
|
|
|
|
mean value: 0.020757150650024415
|
|
|
|
key: test_mcc
|
|
value: [0.88070485 1. 0.84270097 0.80064077 0.91833333 0.92128466
|
|
0.87813144 0.87833333 0.91833333 0.96 ]
|
|
|
|
mean value: 0.8998462691273253
|
|
|
|
key: train_mcc
|
|
value: [0.97301246 0.95499371 0.97301246 0.95954708 0.95080412 0.96420203
|
|
0.95979475 0.96862627 0.95540071 0.96862627]
|
|
|
|
mean value: 0.962801986172177
|
|
|
|
key: test_accuracy
|
|
value: [0.94 1. 0.92 0.9 0.95918367 0.95918367
|
|
0.93877551 0.93877551 0.95918367 0.97959184]
|
|
|
|
mean value: 0.949469387755102
|
|
|
|
key: train_accuracy
|
|
value: [0.98648649 0.97747748 0.98648649 0.97972973 0.9752809 0.98202247
|
|
0.97977528 0.98426966 0.97752809 0.98426966]
|
|
|
|
mean value: 0.981332624759591
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 1. 0.92307692 0.89795918 0.96 0.96153846
|
|
0.94117647 0.93877551 0.95833333 0.97959184]
|
|
|
|
mean value: 0.949922722935328
|
|
|
|
key: train_fscore
|
|
value: [0.98654709 0.97757848 0.98654709 0.97986577 0.97550111 0.98214286
|
|
0.97995546 0.9844098 0.97787611 0.9844098 ]
|
|
|
|
mean value: 0.9814833550154571
|
|
|
|
key: test_precision
|
|
value: [0.95833333 1. 0.88888889 0.91666667 0.96 0.92592593
|
|
0.92307692 0.92 0.95833333 0.96 ]
|
|
|
|
mean value: 0.9411225071225071
|
|
|
|
key: train_precision
|
|
value: [0.98214286 0.97321429 0.98214286 0.97333333 0.96475771 0.97345133
|
|
0.969163 0.97787611 0.9650655 0.97787611]
|
|
|
|
mean value: 0.9739023080185563
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 0.96 0.88 0.96 1.
|
|
0.96 0.95833333 0.95833333 1. ]
|
|
|
|
mean value: 0.9596666666666667
|
|
|
|
key: train_recall
|
|
value: [0.99099099 0.98198198 0.99099099 0.98648649 0.98648649 0.99099099
|
|
0.99099099 0.99103139 0.99103139 0.99103139]
|
|
|
|
mean value: 0.9892013089322507
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 1. 0.92 0.9 0.95916667 0.95833333
|
|
0.93833333 0.93916667 0.95916667 0.98 ]
|
|
|
|
mean value: 0.9494166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.98648649 0.97747748 0.98648649 0.97972973 0.97530602 0.98204258
|
|
0.97980043 0.98425443 0.97749768 0.98425443]
|
|
|
|
mean value: 0.9813335757281946
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 1. 0.85714286 0.81481481 0.92307692 0.92592593
|
|
0.88888889 0.88461538 0.92 0.96 ]
|
|
|
|
mean value: 0.9059080179080179
|
|
|
|
key: train_jcc
|
|
value: [0.97345133 0.95614035 0.97345133 0.96052632 0.95217391 0.96491228
|
|
0.96069869 0.96929825 0.95670996 0.96929825]
|
|
|
|
mean value: 0.9636660653173514
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03413534 0.03227377 0.0284586 0.0329082 0.03873944 0.07343173
|
|
0.05770206 0.06540132 0.05557275 0.04642391]
|
|
|
|
mean value: 0.04650471210479736
|
|
|
|
key: score_time
|
|
value: [0.01263881 0.01264358 0.01274562 0.01293254 0.01469231 0.01307297
|
|
0.01352239 0.01246619 0.01854992 0.01346874]
|
|
|
|
mean value: 0.01367330551147461
|
|
|
|
key: test_mcc
|
|
value: [0.9258201 0.70064905 0.85163063 0.78446454 0.84983659 0.62103443
|
|
0.76130617 0.71400555 0.62103443 0.44702443]
|
|
|
|
mean value: 0.7276805902251664
|
|
|
|
key: train_mcc
|
|
value: [0.88582334 0.91279206 0.87717553 0.92158297 0.89646935 0.89549921
|
|
0.88011758 0.87887248 0.92161308 0.9039779 ]
|
|
|
|
mean value: 0.8973923495801944
|
|
|
|
key: test_accuracy
|
|
value: [0.96153846 0.84615385 0.92 0.88 0.92 0.8
|
|
0.88 0.84 0.8 0.72 ]
|
|
|
|
mean value: 0.8567692307692307
|
|
|
|
key: train_accuracy
|
|
value: [0.94247788 0.95575221 0.93832599 0.96035242 0.94713656 0.94713656
|
|
0.93832599 0.93832599 0.96035242 0.95154185]
|
|
|
|
mean value: 0.9479727885852404
|
|
|
|
key: test_fscore
|
|
value: [0.96296296 0.83333333 0.92307692 0.88888889 0.90909091 0.81481481
|
|
0.88888889 0.86666667 0.7826087 0.75862069]
|
|
|
|
mean value: 0.8628952773030734
|
|
|
|
key: train_fscore
|
|
value: [0.94372294 0.95689655 0.93965517 0.96137339 0.94915254 0.94871795
|
|
0.94067797 0.94017094 0.96103896 0.95238095]
|
|
|
|
mean value: 0.9493787369202193
|
|
|
|
key: test_precision
|
|
value: [0.92857143 0.90909091 0.85714286 0.8 1. 0.73333333
|
|
0.85714286 0.76470588 0.9 0.6875 ]
|
|
|
|
mean value: 0.8437487267634326
|
|
|
|
key: train_precision
|
|
value: [0.92372881 0.93277311 0.92372881 0.94117647 0.91803279 0.925
|
|
0.90243902 0.90909091 0.94067797 0.93220339]
|
|
|
|
mean value: 0.9248851283249179
|
|
|
|
key: test_recall
|
|
value: [1. 0.76923077 1. 1. 0.83333333 0.91666667
|
|
0.92307692 1. 0.69230769 0.84615385]
|
|
|
|
mean value: 0.8980769230769231
|
|
|
|
key: train_recall
|
|
value: [0.96460177 0.98230088 0.95614035 0.98245614 0.98245614 0.97368421
|
|
0.98230088 0.97345133 0.98230088 0.97345133]
|
|
|
|
mean value: 0.975314392175128
|
|
|
|
key: test_roc_auc
|
|
value: [0.96153846 0.84615385 0.92307692 0.88461538 0.91666667 0.80448718
|
|
0.87820513 0.83333333 0.80448718 0.71474359]
|
|
|
|
mean value: 0.8567307692307693
|
|
|
|
key: train_roc_auc
|
|
value: [0.94247788 0.95575221 0.93824717 0.96025462 0.94698028 0.9470191
|
|
0.93851886 0.93848005 0.96044869 0.95163794]
|
|
|
|
mean value: 0.9479816798633752
|
|
|
|
key: test_jcc
|
|
value: [0.92857143 0.71428571 0.85714286 0.8 0.83333333 0.6875
|
|
0.8 0.76470588 0.64285714 0.61111111]
|
|
|
|
mean value: 0.7639507469654528
|
|
|
|
key: train_jcc
|
|
value: [0.89344262 0.91735537 0.88617886 0.92561983 0.90322581 0.90243902
|
|
0.888 0.88709677 0.925 0.90909091]
|
|
|
|
mean value: 0.9037449205477323
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.79028153 0.78926802 0.79229522 0.71584368 0.77593255 0.81414747
|
|
0.68222499 0.66712141 0.76080084 0.69142079]
|
|
|
|
mean value: 0.7479336500167847
|
|
|
|
key: score_time
|
|
value: [0.01258707 0.02184248 0.01340437 0.01317191 0.01403666 0.01361775
|
|
0.0136764 0.01403832 0.01372528 0.01370764]
|
|
|
|
mean value: 0.014380788803100586
|
|
|
|
key: test_mcc
|
|
value: [1. 0.77151675 0.85163063 0.69033695 0.84983659 0.92307692
|
|
0.6025641 0.92307692 0.85163063 0.6025641 ]
|
|
|
|
mean value: 0.806623358904532
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.99122739 1. 0.99122739 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9982454784329841
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.88461538 0.92 0.84 0.92 0.96
|
|
0.8 0.96 0.92 0.8 ]
|
|
|
|
mean value: 0.9004615384615384
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99559471 1. 0.99559471 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9991189427312775
|
|
|
|
key: test_fscore
|
|
value: [1. 0.88 0.92307692 0.84615385 0.90909091 0.96
|
|
0.8 0.96 0.91666667 0.8 ]
|
|
|
|
mean value: 0.8994988344988345
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99563319 1. 0.99563319 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9991266375545852
|
|
|
|
key: test_precision
|
|
value: [1. 0.91666667 0.85714286 0.78571429 1. 0.92307692
|
|
0.83333333 1. 1. 0.83333333]
|
|
|
|
mean value: 0.9149267399267399
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.99130435 1. 0.99130435 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9982608695652174
|
|
|
|
key: test_recall
|
|
value: [1. 0.84615385 1. 0.91666667 0.83333333 1.
|
|
0.76923077 0.92307692 0.84615385 0.76923077]
|
|
|
|
mean value: 0.8903846153846154
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.88461538 0.92307692 0.84294872 0.91666667 0.96153846
|
|
0.80128205 0.96153846 0.92307692 0.80128205]
|
|
|
|
mean value: 0.9016025641025641
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99557522 1. 0.99557522 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9991150442477876
|
|
|
|
key: test_jcc
|
|
value: [1. 0.78571429 0.85714286 0.73333333 0.83333333 0.92307692
|
|
0.66666667 0.92307692 0.84615385 0.66666667]
|
|
|
|
mean value: 0.8235164835164835
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.99130435 1. 0.99130435 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9982608695652174
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01308942 0.01135349 0.00957036 0.00921202 0.00923777 0.00910544
|
|
0.00998735 0.00916696 0.00929189 0.00958514]
|
|
|
|
mean value: 0.009959983825683593
|
|
|
|
key: score_time
|
|
value: [0.01556945 0.00919104 0.00885892 0.00881696 0.00934935 0.00868511
|
|
0.00944114 0.00888324 0.00882626 0.00912142]
|
|
|
|
mean value: 0.009674286842346192
|
|
|
|
key: test_mcc
|
|
value: [0.70064905 0.46291005 0.65908204 0.62103443 0.71400555 0.52904327
|
|
0.44230769 0.39743046 0.5423696 0.44702443]
|
|
|
|
mean value: 0.5515856562828161
|
|
|
|
key: train_mcc
|
|
value: [0.6592444 0.66633112 0.6770533 0.65236608 0.67978244 0.69476577
|
|
0.65639314 0.65138192 0.55765703 0.68560054]
|
|
|
|
mean value: 0.6580575735712032
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.73076923 0.8 0.8 0.84 0.76
|
|
0.72 0.68 0.76 0.72 ]
|
|
|
|
mean value: 0.7656923076923077
|
|
|
|
key: train_accuracy
|
|
value: [0.82743363 0.83185841 0.83700441 0.82378855 0.83700441 0.84581498
|
|
0.82819383 0.82378855 0.76651982 0.84140969]
|
|
|
|
mean value: 0.826281626447312
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.74074074 0.82758621 0.81481481 0.8 0.76923077
|
|
0.72 0.75 0.8 0.75862069]
|
|
|
|
mean value: 0.7838136078480906
|
|
|
|
key: train_fscore
|
|
value: [0.83682008 0.83898305 0.84518828 0.83471074 0.84773663 0.85355649
|
|
0.82666667 0.83193277 0.7953668 0.84745763]
|
|
|
|
mean value: 0.835841913598135
|
|
|
|
key: test_precision
|
|
value: [0.8 0.71428571 0.70588235 0.73333333 1. 0.71428571
|
|
0.75 0.63157895 0.70588235 0.6875 ]
|
|
|
|
mean value: 0.7442748415155536
|
|
|
|
key: train_precision
|
|
value: [0.79365079 0.80487805 0.808 0.7890625 0.79844961 0.816
|
|
0.83035714 0.792 0.70547945 0.81300813]
|
|
|
|
mean value: 0.7950885679827621
|
|
|
|
key: test_recall
|
|
value: [0.92307692 0.76923077 1. 0.91666667 0.66666667 0.83333333
|
|
0.69230769 0.92307692 0.92307692 0.84615385]
|
|
|
|
mean value: 0.8493589743589743
|
|
|
|
key: train_recall
|
|
value: [0.88495575 0.87610619 0.88596491 0.88596491 0.90350877 0.89473684
|
|
0.82300885 0.87610619 0.91150442 0.88495575]
|
|
|
|
mean value: 0.8826812606738084
|
|
|
|
key: test_roc_auc
|
|
value: [0.84615385 0.73076923 0.80769231 0.80448718 0.83333333 0.76282051
|
|
0.72115385 0.66987179 0.75320513 0.71474359]
|
|
|
|
mean value: 0.7644230769230769
|
|
|
|
key: train_roc_auc
|
|
value: [0.82743363 0.83185841 0.83678777 0.82351343 0.83671014 0.84559851
|
|
0.82817109 0.82401801 0.76715572 0.84160068]
|
|
|
|
mean value: 0.8262847383946592
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.58823529 0.70588235 0.6875 0.66666667 0.625
|
|
0.5625 0.6 0.66666667 0.61111111]
|
|
|
|
mean value: 0.6463562091503268
|
|
|
|
key: train_jcc
|
|
value: [0.71942446 0.72262774 0.73188406 0.71631206 0.73571429 0.74452555
|
|
0.70454545 0.71223022 0.66025641 0.73529412]
|
|
|
|
mean value: 0.7182814343802338
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01043892 0.00924373 0.01011038 0.01104879 0.00947833 0.01047659
|
|
0.00920486 0.00944543 0.0094595 0.00935364]
|
|
|
|
mean value: 0.009826016426086426
|
|
|
|
key: score_time
|
|
value: [0.0091598 0.00879455 0.00882435 0.00961089 0.00867343 0.00944972
|
|
0.00876999 0.00920916 0.00893283 0.00882697]
|
|
|
|
mean value: 0.009025168418884278
|
|
|
|
key: test_mcc
|
|
value: [0.6172134 0.46291005 0.67948718 0.78446454 0.5423696 0.35897436
|
|
0.76282051 0.37073365 0.6025641 0.61382459]
|
|
|
|
mean value: 0.5795361981703262
|
|
|
|
key: train_mcc
|
|
value: [0.73636096 0.72912657 0.73844914 0.72075193 0.71535838 0.71845889
|
|
0.7237303 0.76240393 0.72715275 0.7459772 ]
|
|
|
|
mean value: 0.7317770046503951
|
|
|
|
key: test_accuracy
|
|
value: [0.80769231 0.73076923 0.84 0.88 0.76 0.68
|
|
0.88 0.68 0.8 0.8 ]
|
|
|
|
mean value: 0.7858461538461539
|
|
|
|
key: train_accuracy
|
|
value: [0.86725664 0.86283186 0.86784141 0.85903084 0.85462555 0.85903084
|
|
0.85903084 0.88105727 0.86343612 0.8722467 ]
|
|
|
|
mean value: 0.8646388055046587
|
|
|
|
key: test_fscore
|
|
value: [0.81481481 0.74074074 0.83333333 0.88888889 0.7 0.66666667
|
|
0.88 0.73333333 0.8 0.82758621]
|
|
|
|
mean value: 0.788536398467433
|
|
|
|
key: train_fscore
|
|
value: [0.87179487 0.86919831 0.87394958 0.86554622 0.86419753 0.86206897
|
|
0.86666667 0.88209607 0.86462882 0.87553648]
|
|
|
|
mean value: 0.8695683516914982
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.71428571 0.83333333 0.8 0.875 0.66666667
|
|
0.91666667 0.64705882 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7822058823529412
|
|
|
|
key: train_precision
|
|
value: [0.84297521 0.83064516 0.83870968 0.83064516 0.81395349 0.84745763
|
|
0.81889764 0.87068966 0.85344828 0.85 ]
|
|
|
|
mean value: 0.8397421890932065
|
|
|
|
key: test_recall
|
|
value: [0.84615385 0.76923077 0.83333333 1. 0.58333333 0.66666667
|
|
0.84615385 0.84615385 0.76923077 0.92307692]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [0.90265487 0.91150442 0.9122807 0.90350877 0.92105263 0.87719298
|
|
0.92035398 0.89380531 0.87610619 0.90265487]
|
|
|
|
mean value: 0.9021114733736997
|
|
|
|
key: test_roc_auc
|
|
value: [0.80769231 0.73076923 0.83974359 0.88461538 0.75320513 0.67948718
|
|
0.88141026 0.67307692 0.80128205 0.79487179]
|
|
|
|
mean value: 0.7846153846153846
|
|
|
|
key: train_roc_auc
|
|
value: [0.86725664 0.86283186 0.86764478 0.85883403 0.85433163 0.85895047
|
|
0.8592998 0.88111318 0.86349169 0.87238007]
|
|
|
|
mean value: 0.8646134140661388
|
|
|
|
key: test_jcc
|
|
value: [0.6875 0.58823529 0.71428571 0.8 0.53846154 0.5
|
|
0.78571429 0.57894737 0.66666667 0.70588235]
|
|
|
|
mean value: 0.6565693220608081
|
|
|
|
key: train_jcc
|
|
value: [0.77272727 0.76865672 0.7761194 0.76296296 0.76086957 0.75757576
|
|
0.76470588 0.7890625 0.76153846 0.77862595]
|
|
|
|
mean value: 0.7692844475976246
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00923204 0.00892687 0.00996375 0.01031876 0.00995374 0.01004934
|
|
0.00994253 0.01012826 0.0100534 0.00993919]
|
|
|
|
mean value: 0.009850788116455077
|
|
|
|
key: score_time
|
|
value: [0.01516294 0.01499009 0.01087356 0.01082897 0.01076412 0.01109385
|
|
0.01093602 0.01061201 0.01087832 0.01079845]
|
|
|
|
mean value: 0.011693835258483887
|
|
|
|
key: test_mcc
|
|
value: [0.38924947 0.56591646 0.37073365 0.60001249 0.52297636 0.83974359
|
|
0.51923077 0.61382459 0.43871881 0.35954625]
|
|
|
|
mean value: 0.5219952432099637
|
|
|
|
key: train_mcc
|
|
value: [0.64611891 0.70087369 0.70952716 0.63007531 0.66526906 0.66532102
|
|
0.63902479 0.62166825 0.66526906 0.66555334]
|
|
|
|
mean value: 0.6608700579491201
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.76923077 0.68 0.8 0.76 0.92
|
|
0.76 0.8 0.72 0.68 ]
|
|
|
|
mean value: 0.7581538461538462
|
|
|
|
key: train_accuracy
|
|
value: [0.82300885 0.84955752 0.85462555 0.81497797 0.83259912 0.83259912
|
|
0.81938326 0.81057269 0.83259912 0.83259912]
|
|
|
|
mean value: 0.830252231881798
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.6 0.7826087 0.72727273 0.91666667
|
|
0.76923077 0.82758621 0.74074074 0.71428571]
|
|
|
|
mean value: 0.754505818741201
|
|
|
|
key: train_fscore
|
|
value: [0.82142857 0.85470085 0.85333333 0.81415929 0.83478261 0.83185841
|
|
0.8209607 0.80542986 0.83035714 0.82882883]
|
|
|
|
mean value: 0.8295839601902778
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.70588235 0.75 0.81818182 0.8 0.91666667
|
|
0.76923077 0.75 0.71428571 0.66666667]
|
|
|
|
mean value: 0.7618186715245538
|
|
|
|
key: train_precision
|
|
value: [0.82882883 0.82644628 0.86486486 0.82142857 0.82758621 0.83928571
|
|
0.81034483 0.82407407 0.83783784 0.8440367 ]
|
|
|
|
mean value: 0.8324733904042092
|
|
|
|
key: test_recall
|
|
value: [0.61538462 0.92307692 0.5 0.75 0.66666667 0.91666667
|
|
0.76923077 0.92307692 0.76923077 0.76923077]
|
|
|
|
mean value: 0.7602564102564102
|
|
|
|
key: train_recall
|
|
value: [0.81415929 0.88495575 0.84210526 0.80701754 0.84210526 0.8245614
|
|
0.83185841 0.78761062 0.82300885 0.81415929]
|
|
|
|
mean value: 0.8271541686073591
|
|
|
|
key: test_roc_auc
|
|
value: [0.69230769 0.76923077 0.67307692 0.79807692 0.75641026 0.91987179
|
|
0.75961538 0.79487179 0.71794872 0.67628205]
|
|
|
|
mean value: 0.7557692307692307
|
|
|
|
key: train_roc_auc
|
|
value: [0.82300885 0.84955752 0.85468095 0.8150132 0.83255706 0.83263468
|
|
0.81943798 0.81047198 0.83255706 0.83251824]
|
|
|
|
mean value: 0.8302437509703462
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.42857143 0.64285714 0.57142857 0.84615385
|
|
0.625 0.70588235 0.58823529 0.55555556]
|
|
|
|
mean value: 0.6130350858292035
|
|
|
|
key: train_jcc
|
|
value: [0.6969697 0.74626866 0.74418605 0.68656716 0.71641791 0.71212121
|
|
0.6962963 0.67424242 0.70992366 0.70769231]
|
|
|
|
mean value: 0.7090685379298987
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01528049 0.01310015 0.01299286 0.01418447 0.01399589 0.01312518
|
|
0.01324058 0.013273 0.01394391 0.01383018]
|
|
|
|
mean value: 0.013696670532226562
|
|
|
|
key: score_time
|
|
value: [0.01167631 0.01034451 0.00977945 0.00984764 0.00952077 0.0103097
|
|
0.01044273 0.00946236 0.01037502 0.00957084]
|
|
|
|
mean value: 0.010132932662963867
|
|
|
|
key: test_mcc
|
|
value: [0.79056942 0.38924947 0.76282051 0.72057669 0.68640647 0.65908204
|
|
0.76130617 0.44702443 0.83974359 0.44702443]
|
|
|
|
mean value: 0.6503803223982324
|
|
|
|
key: train_mcc
|
|
value: [0.79000853 0.83138508 0.87053585 0.79851456 0.79327113 0.81418412
|
|
0.81246681 0.84355265 0.84796142 0.86243422]
|
|
|
|
mean value: 0.8264314354378517
|
|
|
|
key: test_accuracy
|
|
value: [0.88461538 0.69230769 0.88 0.84 0.84 0.8
|
|
0.88 0.72 0.92 0.72 ]
|
|
|
|
mean value: 0.8176923076923077
|
|
|
|
key: train_accuracy
|
|
value: [0.88938053 0.91150442 0.9339207 0.89427313 0.88986784 0.9030837
|
|
0.9030837 0.92070485 0.92070485 0.92951542]
|
|
|
|
mean value: 0.9096039140774239
|
|
|
|
key: test_fscore
|
|
value: [0.89655172 0.71428571 0.88 0.85714286 0.81818182 0.82758621
|
|
0.88888889 0.75862069 0.92307692 0.75862069]
|
|
|
|
mean value: 0.8322955511921029
|
|
|
|
key: train_fscore
|
|
value: [0.89795918 0.91735537 0.93670886 0.90243902 0.89959839 0.90983607
|
|
0.90833333 0.92307692 0.925 0.93220339]
|
|
|
|
mean value: 0.9152510546112866
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.66666667 0.84615385 0.75 0.9 0.70588235
|
|
0.85714286 0.6875 0.92307692 0.6875 ]
|
|
|
|
mean value: 0.7836422645981469
|
|
|
|
key: train_precision
|
|
value: [0.83333333 0.86046512 0.90243902 0.84090909 0.82962963 0.85384615
|
|
0.85826772 0.89256198 0.87401575 0.89430894]
|
|
|
|
mean value: 0.8639776739514956
|
|
|
|
key: test_recall
|
|
value: [1. 0.76923077 0.91666667 1. 0.75 1.
|
|
0.92307692 0.84615385 0.92307692 0.84615385]
|
|
|
|
mean value: 0.8974358974358975
|
|
|
|
key: train_recall
|
|
value: [0.97345133 0.98230088 0.97368421 0.97368421 0.98245614 0.97368421
|
|
0.96460177 0.95575221 0.98230088 0.97345133]
|
|
|
|
mean value: 0.9735367179009471
|
|
|
|
key: test_roc_auc
|
|
value: [0.88461538 0.69230769 0.88141026 0.84615385 0.83653846 0.80769231
|
|
0.87820513 0.71474359 0.91987179 0.71474359]
|
|
|
|
mean value: 0.8176282051282051
|
|
|
|
key: train_roc_auc
|
|
value: [0.88938053 0.91150442 0.93374476 0.89392175 0.88945816 0.90277131
|
|
0.90335352 0.92085856 0.920975 0.92970812]
|
|
|
|
mean value: 0.909567613724577
|
|
|
|
key: test_jcc
|
|
value: [0.8125 0.55555556 0.78571429 0.75 0.69230769 0.70588235
|
|
0.8 0.61111111 0.85714286 0.61111111]
|
|
|
|
mean value: 0.718132496588379
|
|
|
|
key: train_jcc
|
|
value: [0.81481481 0.84732824 0.88095238 0.82222222 0.81751825 0.83458647
|
|
0.83206107 0.85714286 0.86046512 0.87301587]
|
|
|
|
mean value: 0.8440107291744913
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.19634104 1.88567209 0.91407704 0.63246727 1.39020109 1.56026959
|
|
1.35955167 1.42231727 1.0939703 1.27300692]
|
|
|
|
mean value: 1.2727874279022218
|
|
|
|
key: score_time
|
|
value: [0.01739836 0.016572 0.01230597 0.01282024 0.01462388 0.01456165
|
|
0.01275301 0.0154891 0.01344919 0.01518488]
|
|
|
|
mean value: 0.014515829086303712
|
|
|
|
key: test_mcc
|
|
value: [0.77151675 0.70064905 0.85163063 0.85163063 0.84983659 0.78446454
|
|
0.76282051 0.61382459 0.69033695 0.68640647]
|
|
|
|
mean value: 0.75631167034517
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.96475703 0.92965295 0.99122739 1.
|
|
1. 1. 0.99122807 1. ]
|
|
|
|
mean value: 0.9876865436231997
|
|
|
|
key: test_accuracy
|
|
value: [0.88461538 0.84615385 0.92 0.92 0.92 0.88
|
|
0.88 0.8 0.84 0.84 ]
|
|
|
|
mean value: 0.8730769230769231
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98237885 0.96475771 0.99559471 1.
|
|
1. 1. 0.99559471 1. ]
|
|
|
|
mean value: 0.9938325991189427
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.83333333 0.92307692 0.92307692 0.90909091 0.88888889
|
|
0.88 0.82758621 0.83333333 0.85714286]
|
|
|
|
mean value: 0.8764418263728608
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.98245614 0.96521739 0.99563319 1.
|
|
1. 1. 0.99559471 1. ]
|
|
|
|
mean value: 0.9938901433084538
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.90909091 0.85714286 0.85714286 1. 0.8
|
|
0.91666667 0.75 0.90909091 0.8 ]
|
|
|
|
mean value: 0.8656277056277056
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.98245614 0.95689655 0.99130435 1.
|
|
1. 1. 0.99122807 1. ]
|
|
|
|
mean value: 0.992188511007654
|
|
|
|
key: test_recall
|
|
value: [0.92307692 0.76923077 1. 1. 0.83333333 1.
|
|
0.84615385 0.92307692 0.76923077 0.92307692]
|
|
|
|
mean value: 0.8987179487179487
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.98245614 0.97368421 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9956140350877193
|
|
|
|
key: test_roc_auc
|
|
value: [0.88461538 0.84615385 0.92307692 0.92307692 0.91666667 0.88461538
|
|
0.88141026 0.79487179 0.84294872 0.83653846]
|
|
|
|
mean value: 0.8733974358974359
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.98237851 0.96471821 0.99557522 1.
|
|
1. 1. 0.99561404 1. ]
|
|
|
|
mean value: 0.993828598043782
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.71428571 0.85714286 0.85714286 0.83333333 0.8
|
|
0.78571429 0.70588235 0.71428571 0.75 ]
|
|
|
|
mean value: 0.7817787114845939
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.96551724 0.93277311 0.99130435 1.
|
|
1. 1. 0.99122807 1. ]
|
|
|
|
mean value: 0.9880822768624533
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01784992 0.01510668 0.01419759 0.01308346 0.01286006 0.01507568
|
|
0.01366568 0.01394176 0.01240349 0.01515055]
|
|
|
|
mean value: 0.014333486557006836
|
|
|
|
key: score_time
|
|
value: [0.01202488 0.00984907 0.0098002 0.0087719 0.00910997 0.00981688
|
|
0.00906801 0.00890541 0.0089016 0.00896502]
|
|
|
|
mean value: 0.009521293640136718
|
|
|
|
key: test_mcc
|
|
value: [1. 0.79056942 0.85163063 0.76282051 1. 0.76282051
|
|
0.85163063 0.85163063 0.92259985 0.67948718]
|
|
|
|
mean value: 0.847318934811858
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.88461538 0.92 0.88 1. 0.88
|
|
0.92 0.92 0.96 0.84 ]
|
|
|
|
mean value: 0.9204615384615384
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.86956522 0.92307692 0.88 1. 0.88
|
|
0.91666667 0.91666667 0.96296296 0.84615385]
|
|
|
|
mean value: 0.919509228291837
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.85714286 0.84615385 1. 0.84615385
|
|
1. 1. 0.92857143 0.84615385]
|
|
|
|
mean value: 0.9324175824175824
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.76923077 1. 0.91666667 1. 0.91666667
|
|
0.84615385 0.84615385 1. 0.84615385]
|
|
|
|
mean value: 0.9141025641025641
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.88461538 0.92307692 0.88141026 1. 0.88141026
|
|
0.92307692 0.92307692 0.95833333 0.83974359]
|
|
|
|
mean value: 0.9214743589743589
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.76923077 0.85714286 0.78571429 1. 0.78571429
|
|
0.84615385 0.84615385 0.92857143 0.73333333]
|
|
|
|
mean value: 0.8552014652014652
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10892153 0.10659099 0.09640145 0.102952 0.10315108 0.10590339
|
|
0.09663677 0.0991776 0.09782553 0.09687376]
|
|
|
|
mean value: 0.10144340991973877
|
|
|
|
key: score_time
|
|
value: [0.0192492 0.0177021 0.0176034 0.01901913 0.01889372 0.01901245
|
|
0.01884246 0.01873541 0.01843476 0.01731968]
|
|
|
|
mean value: 0.018481230735778807
|
|
|
|
key: test_mcc
|
|
value: [0.84615385 0.6172134 0.60001249 0.60001249 0.58489765 0.85163063
|
|
0.67948718 0.64907341 0.83974359 0.52297636]
|
|
|
|
mean value: 0.679120104312635
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.80769231 0.8 0.8 0.76 0.92
|
|
0.84 0.8 0.92 0.76 ]
|
|
|
|
mean value: 0.833076923076923
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.81481481 0.7826087 0.7826087 0.66666667 0.92307692
|
|
0.84615385 0.83870968 0.92307692 0.78571429]
|
|
|
|
mean value: 0.8286507451304085
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.78571429 0.81818182 0.81818182 1. 0.85714286
|
|
0.84615385 0.72222222 0.92307692 0.73333333]
|
|
|
|
mean value: 0.8427084027084027
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.92307692 0.84615385 0.75 0.75 0.5 1.
|
|
0.84615385 1. 0.92307692 0.84615385]
|
|
|
|
mean value: 0.8384615384615385
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92307692 0.80769231 0.79807692 0.79807692 0.75 0.92307692
|
|
0.83974359 0.79166667 0.91987179 0.75641026]
|
|
|
|
mean value: 0.8307692307692308
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.6875 0.64285714 0.64285714 0.5 0.85714286
|
|
0.73333333 0.72222222 0.85714286 0.64705882]
|
|
|
|
mean value: 0.7147257236227824
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.68
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01059675 0.01052785 0.01050544 0.0105567 0.01037574 0.01049161
|
|
0.00929809 0.0093801 0.00963831 0.0094173 ]
|
|
|
|
mean value: 0.010078787803649902
|
|
|
|
key: score_time
|
|
value: [0.0094595 0.00944734 0.00946069 0.01046562 0.00942874 0.00871587
|
|
0.00866818 0.00864553 0.00857854 0.00864339]
|
|
|
|
mean value: 0.009151339530944824
|
|
|
|
key: test_mcc
|
|
value: [0.15430335 0.72760688 0.52297636 0.11613145 0.12179487 0.62103443
|
|
0.12179487 0.44702443 0.69033695 0.47075654]
|
|
|
|
mean value: 0.39937601251209925
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.57692308 0.84615385 0.76 0.56 0.56 0.8
|
|
0.56 0.72 0.84 0.72 ]
|
|
|
|
mean value: 0.6943076923076923
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.59259259 0.86666667 0.72727273 0.52173913 0.56 0.81481481
|
|
0.56 0.75862069 0.83333333 0.77419355]
|
|
|
|
mean value: 0.7009233503157186
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.76470588 0.8 0.54545455 0.53846154 0.73333333
|
|
0.58333333 0.6875 0.90909091 0.66666667]
|
|
|
|
mean value: 0.6799974780121839
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.61538462 1. 0.66666667 0.5 0.58333333 0.91666667
|
|
0.53846154 0.84615385 0.76923077 0.92307692]
|
|
|
|
mean value: 0.735897435897436
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.57692308 0.84615385 0.75641026 0.55769231 0.56089744 0.80448718
|
|
0.56089744 0.71474359 0.84294872 0.71153846]
|
|
|
|
mean value: 0.6932692307692307
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.42105263 0.76470588 0.57142857 0.35294118 0.38888889 0.6875
|
|
0.38888889 0.61111111 0.71428571 0.63157895]
|
|
|
|
mean value: 0.5532381812374072
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.35448647 1.29955506 1.27398157 1.28936672 1.28128052 1.29093218
|
|
1.28404951 1.30271602 1.28741145 1.2815156 ]
|
|
|
|
mean value: 1.2945295095443725
|
|
|
|
key: score_time
|
|
value: [0.08798695 0.09411907 0.08824182 0.08777428 0.09145427 0.08942056
|
|
0.08843136 0.09486628 0.09361506 0.09238458]
|
|
|
|
mean value: 0.09082942008972168
|
|
|
|
key: test_mcc
|
|
value: [1. 0.77151675 0.85163063 0.83974359 0.84983659 0.92307692
|
|
0.83974359 0.78062475 0.92259985 0.71400555]
|
|
|
|
mean value: 0.8492778208511577
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.88461538 0.92 0.92 0.92 0.96
|
|
0.92 0.88 0.96 0.84 ]
|
|
|
|
mean value: 0.9204615384615384
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.88 0.92307692 0.91666667 0.90909091 0.96
|
|
0.92307692 0.89655172 0.96296296 0.86666667]
|
|
|
|
mean value: 0.9238092775678982
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.91666667 0.85714286 0.91666667 1. 0.92307692
|
|
0.92307692 0.8125 0.92857143 0.76470588]
|
|
|
|
mean value: 0.9042407347554406
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.84615385 1. 0.91666667 0.83333333 1.
|
|
0.92307692 1. 1. 1. ]
|
|
|
|
mean value: 0.9519230769230769
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.88461538 0.92307692 0.91987179 0.91666667 0.96153846
|
|
0.91987179 0.875 0.95833333 0.83333333]
|
|
|
|
mean value: 0.9192307692307693
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.78571429 0.85714286 0.84615385 0.83333333 0.92307692
|
|
0.85714286 0.8125 0.92857143 0.76470588]
|
|
|
|
mean value: 0.8608341413488472
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.89632845 0.9019289 0.91927862 0.87275076 0.91468811 0.90702629
|
|
0.91999006 0.90315461 1.02326894 0.86213589]
|
|
|
|
mean value: 0.9120550632476807
|
|
|
|
key: score_time
|
|
value: [0.14403844 0.21291661 0.16876364 0.20621157 0.17649484 0.22455335
|
|
0.21757507 0.26872635 0.21775103 0.14345646]
|
|
|
|
mean value: 0.198048734664917
|
|
|
|
key: test_mcc
|
|
value: [1. 0.77151675 0.76282051 0.83974359 0.84983659 0.92307692
|
|
0.76130617 0.78062475 0.92259985 0.61382459]
|
|
|
|
mean value: 0.8225349713413062
|
|
|
|
key: train_mcc
|
|
value: [0.93985815 0.95668921 0.96535144 0.94771626 0.96535144 0.94845006
|
|
0.93083144 0.94847348 0.96536202 0.96536202]
|
|
|
|
mean value: 0.9533445514336655
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.88461538 0.88 0.92 0.92 0.96
|
|
0.88 0.88 0.96 0.8 ]
|
|
|
|
mean value: 0.9084615384615384
|
|
|
|
key: train_accuracy
|
|
value: [0.96902655 0.97787611 0.98237885 0.97356828 0.98237885 0.97356828
|
|
0.96475771 0.97356828 0.98237885 0.98237885]
|
|
|
|
mean value: 0.9761880628435539
|
|
|
|
key: test_fscore
|
|
value: [1. 0.88 0.88 0.91666667 0.90909091 0.96
|
|
0.88888889 0.89655172 0.96296296 0.82758621]
|
|
|
|
mean value: 0.912174735864391
|
|
|
|
key: train_fscore
|
|
value: [0.96995708 0.97835498 0.98275862 0.97413793 0.98275862 0.97435897
|
|
0.96551724 0.97413793 0.9826087 0.9826087 ]
|
|
|
|
mean value: 0.9767198770390951
|
|
|
|
key: test_precision
|
|
value: [1. 0.91666667 0.84615385 0.91666667 1. 0.92307692
|
|
0.85714286 0.8125 0.92857143 0.75 ]
|
|
|
|
mean value: 0.8950778388278389
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [0.94166667 0.95762712 0.96610169 0.95762712 0.96610169 0.95
|
|
0.94117647 0.94957983 0.96581197 0.96581197]
|
|
|
|
mean value: 0.956150452793025
|
|
|
|
key: test_recall
|
|
value: [1. 0.84615385 0.91666667 0.91666667 0.83333333 1.
|
|
0.92307692 1. 1. 0.92307692]
|
|
|
|
mean value: 0.9358974358974359
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.99122807 1. 1.
|
|
0.99115044 1. 1. 1. ]
|
|
|
|
mean value: 0.9982378512653315
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.88461538 0.88141026 0.91987179 0.91666667 0.96153846
|
|
0.87820513 0.875 0.95833333 0.79487179]
|
|
|
|
mean value: 0.907051282051282
|
|
|
|
key: train_roc_auc
|
|
value: [0.96902655 0.97787611 0.98230088 0.97349014 0.98230088 0.97345133
|
|
0.96487347 0.97368421 0.98245614 0.98245614]
|
|
|
|
mean value: 0.9761915851575842
|
|
|
|
key: test_jcc
|
|
value: [1. 0.78571429 0.78571429 0.84615385 0.83333333 0.92307692
|
|
0.8 0.8125 0.92857143 0.70588235]
|
|
|
|
mean value: 0.8420946455505279
|
|
|
|
key: train_jcc
|
|
value: [0.94166667 0.95762712 0.96610169 0.94957983 0.96610169 0.95
|
|
0.93333333 0.94957983 0.96581197 0.96581197]
|
|
|
|
mean value: 0.9545614103964054
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02502823 0.01045775 0.01068568 0.01104236 0.01042438 0.01034069
|
|
0.01034737 0.01054955 0.01050019 0.01061559]
|
|
|
|
mean value: 0.011999177932739257
|
|
|
|
key: score_time
|
|
value: [0.01275039 0.00938463 0.00985813 0.00967574 0.00953794 0.00954342
|
|
0.00956988 0.00959826 0.00953984 0.00962615]
|
|
|
|
mean value: 0.009908437728881836
|
|
|
|
key: test_mcc
|
|
value: [0.6172134 0.46291005 0.67948718 0.78446454 0.5423696 0.35897436
|
|
0.76282051 0.37073365 0.6025641 0.61382459]
|
|
|
|
mean value: 0.5795361981703262
|
|
|
|
key: train_mcc
|
|
value: [0.73636096 0.72912657 0.73844914 0.72075193 0.71535838 0.71845889
|
|
0.7237303 0.76240393 0.72715275 0.7459772 ]
|
|
|
|
mean value: 0.7317770046503951
|
|
|
|
key: test_accuracy
|
|
value: [0.80769231 0.73076923 0.84 0.88 0.76 0.68
|
|
0.88 0.68 0.8 0.8 ]
|
|
|
|
mean value: 0.7858461538461539
|
|
|
|
key: train_accuracy
|
|
value: [0.86725664 0.86283186 0.86784141 0.85903084 0.85462555 0.85903084
|
|
0.85903084 0.88105727 0.86343612 0.8722467 ]
|
|
|
|
mean value: 0.8646388055046587
|
|
|
|
key: test_fscore
|
|
value: [0.81481481 0.74074074 0.83333333 0.88888889 0.7 0.66666667
|
|
0.88 0.73333333 0.8 0.82758621]
|
|
|
|
mean value: 0.788536398467433
|
|
|
|
key: train_fscore
|
|
value: [0.87179487 0.86919831 0.87394958 0.86554622 0.86419753 0.86206897
|
|
0.86666667 0.88209607 0.86462882 0.87553648]
|
|
|
|
mean value: 0.8695683516914982
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.71428571 0.83333333 0.8 0.875 0.66666667
|
|
0.91666667 0.64705882 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7822058823529412
|
|
|
|
key: train_precision
|
|
value: [0.84297521 0.83064516 0.83870968 0.83064516 0.81395349 0.84745763
|
|
0.81889764 0.87068966 0.85344828 0.85 ]
|
|
|
|
mean value: 0.8397421890932065
|
|
|
|
key: test_recall
|
|
value: [0.84615385 0.76923077 0.83333333 1. 0.58333333 0.66666667
|
|
0.84615385 0.84615385 0.76923077 0.92307692]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [0.90265487 0.91150442 0.9122807 0.90350877 0.92105263 0.87719298
|
|
0.92035398 0.89380531 0.87610619 0.90265487]
|
|
|
|
mean value: 0.9021114733736997
|
|
|
|
key: test_roc_auc
|
|
value: [0.80769231 0.73076923 0.83974359 0.88461538 0.75320513 0.67948718
|
|
0.88141026 0.67307692 0.80128205 0.79487179]
|
|
|
|
mean value: 0.7846153846153846
|
|
|
|
key: train_roc_auc
|
|
value: [0.86725664 0.86283186 0.86764478 0.85883403 0.85433163 0.85895047
|
|
0.8592998 0.88111318 0.86349169 0.87238007]
|
|
|
|
mean value: 0.8646134140661388
|
|
|
|
key: test_jcc
|
|
value: [0.6875 0.58823529 0.71428571 0.8 0.53846154 0.5
|
|
0.78571429 0.57894737 0.66666667 0.70588235]
|
|
|
|
mean value: 0.6565693220608081
|
|
|
|
key: train_jcc
|
|
value: [0.77272727 0.76865672 0.7761194 0.76296296 0.76086957 0.75757576
|
|
0.76470588 0.7890625 0.76153846 0.77862595]
|
|
|
|
mean value: 0.7692844475976246
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08878231 0.05778432 0.05651474 0.05532408 0.05668736 0.05681205
|
|
0.06316423 0.05669594 0.05545926 0.0568428 ]
|
|
|
|
mean value: 0.06040670871734619
|
|
|
|
key: score_time
|
|
value: [0.01091409 0.01069617 0.01028657 0.01098132 0.01100111 0.0105865
|
|
0.01048779 0.01047587 0.01105118 0.01130819]
|
|
|
|
mean value: 0.01077888011932373
|
|
|
|
key: test_mcc
|
|
value: [1. 0.9258201 0.85163063 0.92307692 1. 0.92307692
|
|
0.92307692 0.92307692 0.92259985 0.67948718]
|
|
|
|
mean value: 0.9071845445010424
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.96153846 0.92 0.96 1. 0.96
|
|
0.96 0.96 0.96 0.84 ]
|
|
|
|
mean value: 0.9521538461538461
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.96 0.92307692 0.96 1. 0.96
|
|
0.96 0.96 0.96296296 0.84615385]
|
|
|
|
mean value: 0.9532193732193732
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.85714286 0.92307692 1. 0.92307692
|
|
1. 1. 0.92857143 0.84615385]
|
|
|
|
mean value: 0.9478021978021978
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.92307692 1. 1. 1. 1.
|
|
0.92307692 0.92307692 1. 0.84615385]
|
|
|
|
mean value: 0.9615384615384616
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.96153846 0.92307692 0.96153846 1. 0.96153846
|
|
0.96153846 0.96153846 0.95833333 0.83974359]
|
|
|
|
mean value: 0.9528846153846154
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.92307692 0.85714286 0.92307692 1. 0.92307692
|
|
0.92307692 0.92307692 0.92857143 0.73333333]
|
|
|
|
mean value: 0.9134432234432235
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03367066 0.0482018 0.02775931 0.04978204 0.0281558 0.02796936
|
|
0.0282259 0.05571699 0.05939174 0.05379438]
|
|
|
|
mean value: 0.041266798973083496
|
|
|
|
key: score_time
|
|
value: [0.0248487 0.01226282 0.01609468 0.0121429 0.01205301 0.01208568
|
|
0.01223707 0.02072048 0.02426624 0.03994894]
|
|
|
|
mean value: 0.01866605281829834
|
|
|
|
key: test_mcc
|
|
value: [0.9258201 0.6172134 0.85163063 0.68640647 0.6025641 0.76282051
|
|
0.83974359 0.83974359 0.6025641 0.6025641 ]
|
|
|
|
mean value: 0.7331070599857203
|
|
|
|
key: train_mcc
|
|
value: [0.99118926 1. 0.99122739 1. 0.99122739 0.99122739
|
|
0.99122807 0.99122807 0.98237851 0.98253242]
|
|
|
|
mean value: 0.9912238500106323
|
|
|
|
key: test_accuracy
|
|
value: [0.96153846 0.80769231 0.92 0.84 0.8 0.88
|
|
0.92 0.92 0.8 0.8 ]
|
|
|
|
mean value: 0.8649230769230769
|
|
|
|
key: train_accuracy
|
|
value: [0.99557522 1. 0.99559471 1. 0.99559471 0.99559471
|
|
0.99559471 0.99559471 0.99118943 0.99118943]
|
|
|
|
mean value: 0.9955927644146427
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.81481481 0.92307692 0.81818182 0.8 0.88
|
|
0.92307692 0.92307692 0.8 0.8 ]
|
|
|
|
mean value: 0.8642227402227403
|
|
|
|
key: train_fscore
|
|
value: [0.99559471 1. 0.99563319 1. 0.99563319 0.99563319
|
|
0.99559471 0.99559471 0.99115044 0.99122807]
|
|
|
|
mean value: 0.9956062216941255
|
|
|
|
key: test_precision
|
|
value: [1. 0.78571429 0.85714286 0.9 0.76923077 0.84615385
|
|
0.92307692 0.92307692 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8671062271062271
|
|
|
|
key: train_precision
|
|
value: [0.99122807 1. 0.99130435 1. 0.99130435 0.99130435
|
|
0.99122807 0.99122807 0.99115044 0.9826087 ]
|
|
|
|
mean value: 0.9921356392134627
|
|
|
|
key: test_recall
|
|
value: [0.92307692 0.84615385 1. 0.75 0.83333333 0.91666667
|
|
0.92307692 0.92307692 0.76923077 0.76923077]
|
|
|
|
mean value: 0.8653846153846154
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.99115044 1. ]
|
|
|
|
mean value: 0.9991150442477876
|
|
|
|
key: test_roc_auc
|
|
value: [0.96153846 0.80769231 0.92307692 0.83653846 0.80128205 0.88141026
|
|
0.91987179 0.91987179 0.80128205 0.80128205]
|
|
|
|
mean value: 0.8653846153846154
|
|
|
|
key: train_roc_auc
|
|
value: [0.99557522 1. 0.99557522 1. 0.99557522 0.99557522
|
|
0.99561404 0.99561404 0.99118926 0.99122807]
|
|
|
|
mean value: 0.9955946281633287
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.6875 0.85714286 0.69230769 0.66666667 0.78571429
|
|
0.85714286 0.85714286 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7660027472527472
|
|
|
|
key: train_jcc
|
|
value: [0.99122807 1. 0.99130435 1. 0.99130435 0.99130435
|
|
0.99122807 0.99122807 0.98245614 0.9826087 ]
|
|
|
|
mean value: 0.9912662090007628
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02116728 0.0095849 0.01030636 0.00906777 0.00928402 0.01041389
|
|
0.00919175 0.00998807 0.00985909 0.01014471]
|
|
|
|
mean value: 0.010900783538818359
|
|
|
|
key: score_time
|
|
value: [0.01014805 0.00926661 0.0097146 0.00870943 0.00872517 0.00941133
|
|
0.00859666 0.008569 0.00886679 0.00917649]
|
|
|
|
mean value: 0.009118413925170899
|
|
|
|
key: test_mcc
|
|
value: [0.85634884 0.53846154 0.44230769 0.69033695 0.61382459 0.55377653
|
|
0.51923077 0.5423696 0.76282051 0.27742513]
|
|
|
|
mean value: 0.5796902147743396
|
|
|
|
key: train_mcc
|
|
value: [0.63919874 0.67706181 0.66674713 0.64907619 0.65833543 0.65754007
|
|
0.60653867 0.63945016 0.61245245 0.71912694]
|
|
|
|
mean value: 0.6525527594685795
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.76923077 0.72 0.84 0.8 0.76
|
|
0.76 0.76 0.88 0.64 ]
|
|
|
|
mean value: 0.7852307692307693
|
|
|
|
key: train_accuracy
|
|
value: [0.81858407 0.83628319 0.83259912 0.82378855 0.82819383 0.82819383
|
|
0.80176211 0.81938326 0.8061674 0.85903084]
|
|
|
|
mean value: 0.8253986199368446
|
|
|
|
key: test_fscore
|
|
value: [0.92857143 0.76923077 0.72 0.84615385 0.76190476 0.78571429
|
|
0.76923077 0.8 0.88 0.66666667]
|
|
|
|
mean value: 0.7927472527472528
|
|
|
|
key: train_fscore
|
|
value: [0.82553191 0.84518828 0.83898305 0.83050847 0.83544304 0.83404255
|
|
0.81012658 0.82251082 0.80701754 0.86206897]
|
|
|
|
mean value: 0.8311421230168541
|
|
|
|
key: test_precision
|
|
value: [0.86666667 0.76923077 0.69230769 0.78571429 0.88888889 0.6875
|
|
0.76923077 0.70588235 0.91666667 0.64285714]
|
|
|
|
mean value: 0.7724945234504058
|
|
|
|
key: train_precision
|
|
value: [0.79508197 0.8015873 0.81147541 0.80327869 0.80487805 0.80991736
|
|
0.77419355 0.80508475 0.8 0.84033613]
|
|
|
|
mean value: 0.8045833199917051
|
|
|
|
key: test_recall
|
|
value: [1. 0.76923077 0.75 0.91666667 0.66666667 0.91666667
|
|
0.76923077 0.92307692 0.84615385 0.69230769]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [0.85840708 0.89380531 0.86842105 0.85964912 0.86842105 0.85964912
|
|
0.84955752 0.84070796 0.81415929 0.88495575]
|
|
|
|
mean value: 0.8597733271231175
|
|
|
|
key: test_roc_auc
|
|
value: [0.92307692 0.76923077 0.72115385 0.84294872 0.79487179 0.76602564
|
|
0.75961538 0.75320513 0.88141026 0.63782051]
|
|
|
|
mean value: 0.7849358974358974
|
|
|
|
key: train_roc_auc
|
|
value: [0.81858407 0.83628319 0.83244061 0.82362987 0.82801584 0.82805465
|
|
0.80197174 0.81947679 0.80620245 0.85914454]
|
|
|
|
mean value: 0.8253803757180562
|
|
|
|
key: test_jcc
|
|
value: [0.86666667 0.625 0.5625 0.73333333 0.61538462 0.64705882
|
|
0.625 0.66666667 0.78571429 0.5 ]
|
|
|
|
mean value: 0.662732439129498
|
|
|
|
key: train_jcc
|
|
value: [0.70289855 0.73188406 0.72262774 0.71014493 0.7173913 0.71532847
|
|
0.68085106 0.69852941 0.67647059 0.75757576]
|
|
|
|
mean value: 0.7113701866364817
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01269698 0.01918936 0.01562309 0.01837087 0.01782084 0.01832843
|
|
0.01765776 0.01705146 0.01595354 0.0162549 ]
|
|
|
|
mean value: 0.016894721984863283
|
|
|
|
key: score_time
|
|
value: [0.00963902 0.01174426 0.01185369 0.01180768 0.01178145 0.01209712
|
|
0.01181507 0.01191664 0.01179671 0.01185918]
|
|
|
|
mean value: 0.011631083488464356
|
|
|
|
key: test_mcc
|
|
value: [0.79056942 0.70064905 0.58489765 0.76282051 0.78062475 0.76282051
|
|
0.72057669 0.76130617 0.69033695 0.37073365]
|
|
|
|
mean value: 0.6925335348409203
|
|
|
|
key: train_mcc
|
|
value: [0.76912242 0.95608921 0.46046433 0.98252973 0.94773232 0.93010262
|
|
0.73383073 0.94773232 0.87558721 0.90739121]
|
|
|
|
mean value: 0.8510582076931191
|
|
|
|
key: test_accuracy
|
|
value: [0.88461538 0.84615385 0.76 0.88 0.88 0.88
|
|
0.84 0.88 0.84 0.68 ]
|
|
|
|
mean value: 0.837076923076923
|
|
|
|
key: train_accuracy
|
|
value: [0.87168142 0.97787611 0.67400881 0.99118943 0.97356828 0.96475771
|
|
0.85022026 0.97356828 0.9339207 0.95154185]
|
|
|
|
mean value: 0.916233285252037
|
|
|
|
key: test_fscore
|
|
value: [0.89655172 0.83333333 0.66666667 0.88 0.85714286 0.88
|
|
0.81818182 0.88888889 0.83333333 0.73333333]
|
|
|
|
mean value: 0.8287431955018162
|
|
|
|
key: train_fscore
|
|
value: [0.88627451 0.97757848 0.51948052 0.99130435 0.97321429 0.96428571
|
|
0.82291667 0.97391304 0.93775934 0.9535865 ]
|
|
|
|
mean value: 0.9000313396581658
|
|
|
|
key: test_precision
|
|
value: [0.8125 0.90909091 1. 0.84615385 1. 0.84615385
|
|
1. 0.85714286 0.90909091 0.64705882]
|
|
|
|
mean value: 0.882719119116178
|
|
|
|
key: train_precision
|
|
value: [0.79577465 0.99090909 1. 0.98275862 0.99090909 0.98181818
|
|
1. 0.95726496 0.8828125 0.91129032]
|
|
|
|
mean value: 0.9493537412058946
|
|
|
|
key: test_recall
|
|
value: [1. 0.76923077 0.5 0.91666667 0.75 0.91666667
|
|
0.69230769 0.92307692 0.76923077 0.84615385]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 0.96460177 0.35087719 1. 0.95614035 0.94736842
|
|
0.69911504 0.99115044 1. 1. ]
|
|
|
|
mean value: 0.8909253221549449
|
|
|
|
key: test_roc_auc
|
|
value: [0.88461538 0.84615385 0.75 0.88141026 0.875 0.88141026
|
|
0.84615385 0.87820513 0.84294872 0.67307692]
|
|
|
|
mean value: 0.8358974358974358
|
|
|
|
key: train_roc_auc
|
|
value: [0.87168142 0.97787611 0.6754386 0.99115044 0.9736454 0.96483465
|
|
0.84955752 0.9736454 0.93421053 0.95175439]
|
|
|
|
mean value: 0.9163794441856854
|
|
|
|
key: test_jcc
|
|
value: [0.8125 0.71428571 0.5 0.78571429 0.75 0.78571429
|
|
0.69230769 0.8 0.71428571 0.57894737]
|
|
|
|
mean value: 0.7133755060728745
|
|
|
|
key: train_jcc
|
|
value: [0.79577465 0.95614035 0.35087719 0.98275862 0.94782609 0.93103448
|
|
0.69911504 0.94915254 0.8828125 0.91129032]
|
|
|
|
mean value: 0.8406781791353085
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0155251 0.01598763 0.01443887 0.01699662 0.01500106 0.01418018
|
|
0.01515698 0.01546717 0.01587415 0.01561856]
|
|
|
|
mean value: 0.015424633026123047
|
|
|
|
key: score_time
|
|
value: [0.01209092 0.01199055 0.01183677 0.01198864 0.01184702 0.01189756
|
|
0.01212049 0.01184225 0.01188421 0.01181602]
|
|
|
|
mean value: 0.011931443214416504
|
|
|
|
key: test_mcc
|
|
value: [0.85634884 0.47434165 0.21245915 0.47075654 0.84983659 0.6025641
|
|
0.69033695 0.58489765 0.65908204 0.35243833]
|
|
|
|
mean value: 0.5753061835987423
|
|
|
|
key: train_mcc
|
|
value: [0.78376381 0.81348922 0.14941538 0.87558721 0.93836361 0.7379234
|
|
0.90590258 0.68546907 0.89127422 0.67761043]
|
|
|
|
mean value: 0.7458798925132408
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.73076923 0.56 0.72 0.92 0.8
|
|
0.84 0.76 0.8 0.64 ]
|
|
|
|
mean value: 0.7693846153846154
|
|
|
|
key: train_accuracy
|
|
value: [0.88053097 0.89823009 0.51982379 0.9339207 0.969163 0.85903084
|
|
0.95154185 0.81938326 0.94273128 0.81497797]
|
|
|
|
mean value: 0.8589333749171573
|
|
|
|
key: test_fscore
|
|
value: [0.92857143 0.75862069 0.15384615 0.63157895 0.90909091 0.8
|
|
0.83333333 0.8125 0.76190476 0.52631579]
|
|
|
|
mean value: 0.7115762013243865
|
|
|
|
key: train_fscore
|
|
value: [0.89328063 0.90763052 0.08403361 0.92957746 0.969163 0.84158416
|
|
0.94930876 0.84644195 0.93896714 0.77173913]
|
|
|
|
mean value: 0.8131726356655016
|
|
|
|
key: test_precision
|
|
value: [0.86666667 0.6875 1. 0.85714286 1. 0.76923077
|
|
0.90909091 0.68421053 1. 0.83333333]
|
|
|
|
mean value: 0.8607175061780326
|
|
|
|
key: train_precision
|
|
value: [0.80714286 0.83088235 1. 1. 0.97345133 0.96590909
|
|
0.99038462 0.73376623 1. 1. ]
|
|
|
|
mean value: 0.9301536477577602
|
|
|
|
key: test_recall
|
|
value: [1. 0.84615385 0.08333333 0.5 0.83333333 0.83333333
|
|
0.76923077 1. 0.61538462 0.38461538]
|
|
|
|
mean value: 0.6865384615384615
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.04385965 0.86842105 0.96491228 0.74561404
|
|
0.91150442 1. 0.88495575 0.62831858]
|
|
|
|
mean value: 0.8047585778605807
|
|
|
|
key: test_roc_auc
|
|
value: [0.92307692 0.73076923 0.54166667 0.71153846 0.91666667 0.80128205
|
|
0.84294872 0.75 0.80769231 0.65064103]
|
|
|
|
mean value: 0.7676282051282052
|
|
|
|
key: train_roc_auc
|
|
value: [0.88053097 0.89823009 0.52192982 0.93421053 0.9691818 0.85953268
|
|
0.95136625 0.82017544 0.94247788 0.81415929]
|
|
|
|
mean value: 0.8591794752367644
|
|
|
|
key: test_jcc
|
|
value: [0.86666667 0.61111111 0.08333333 0.46153846 0.83333333 0.66666667
|
|
0.71428571 0.68421053 0.61538462 0.35714286]
|
|
|
|
mean value: 0.5893673285778549
|
|
|
|
key: train_jcc
|
|
value: [0.80714286 0.83088235 0.04385965 0.86842105 0.94017094 0.72649573
|
|
0.90350877 0.73376623 0.88495575 0.62831858]
|
|
|
|
mean value: 0.736752192048433
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14452982 0.12118196 0.1191287 0.11694002 0.1201756 0.11760974
|
|
0.11887145 0.11942744 0.11746311 0.11708069]
|
|
|
|
mean value: 0.12124085426330566
|
|
|
|
key: score_time
|
|
value: [0.01630163 0.0148561 0.01473999 0.01513076 0.01487112 0.01480365
|
|
0.0148561 0.01448178 0.01459241 0.01462054]
|
|
|
|
mean value: 0.014925408363342284
|
|
|
|
key: test_mcc
|
|
value: [1. 0.9258201 0.85163063 0.85163063 1. 0.92307692
|
|
1. 0.83974359 0.92259985 0.60001249]
|
|
|
|
mean value: 0.8914514200671246
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.96153846 0.92 0.92 1. 0.96
|
|
1. 0.92 0.96 0.8 ]
|
|
|
|
mean value: 0.9441538461538461
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.96 0.92307692 0.92307692 1. 0.96
|
|
1. 0.92307692 0.96296296 0.81481481]
|
|
|
|
mean value: 0.9467008547008547
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.85714286 0.85714286 1. 0.92307692
|
|
1. 0.92307692 0.92857143 0.78571429]
|
|
|
|
mean value: 0.9274725274725275
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.92307692 1. 1. 1. 1.
|
|
1. 0.92307692 1. 0.84615385]
|
|
|
|
mean value: 0.9692307692307692
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.96153846 0.92307692 0.92307692 1. 0.96153846
|
|
1. 0.91987179 0.95833333 0.79807692]
|
|
|
|
mean value: 0.944551282051282
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.92307692 0.85714286 0.85714286 1. 0.92307692
|
|
1. 0.85714286 0.92857143 0.6875 ]
|
|
|
|
mean value: 0.9033653846153846
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04192924 0.03615928 0.03852701 0.04099917 0.0587132 0.04634476
|
|
0.05016923 0.05863547 0.05693626 0.03817296]
|
|
|
|
mean value: 0.046658658981323244
|
|
|
|
key: score_time
|
|
value: [0.02326035 0.01884866 0.02666903 0.0237236 0.02830863 0.01741791
|
|
0.03411555 0.02771926 0.02965069 0.02110672]
|
|
|
|
mean value: 0.025082039833068847
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.76282051 0.76282051 1. 0.76282051
|
|
0.92307692 1. 0.92259985 0.67948718]
|
|
|
|
mean value: 0.8813625487216001
|
|
|
|
key: train_mcc
|
|
value: [0.98230088 1. 1. 0.99122739 1. 0.99122739
|
|
0.99122739 0.98252973 1. 1. ]
|
|
|
|
mean value: 0.9938512787617281
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.88 0.88 1. 0.88 0.96 1. 0.96 0.84]
|
|
|
|
mean value: 0.94
|
|
|
|
key: train_accuracy
|
|
value: [0.99115044 1. 1. 0.99559471 1. 0.99559471
|
|
0.99559471 0.99118943 1. 1. ]
|
|
|
|
mean value: 0.9969124010759814
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.88 0.88 1. 0.88
|
|
0.96 1. 0.96296296 0.84615385]
|
|
|
|
mean value: 0.9409116809116809
|
|
|
|
key: train_fscore
|
|
value: [0.99115044 1. 1. 0.99563319 1. 0.99563319
|
|
0.99555556 0.99107143 1. 1. ]
|
|
|
|
mean value: 0.9969043802150712
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.84615385 0.84615385 1. 0.84615385
|
|
1. 1. 0.92857143 0.84615385]
|
|
|
|
mean value: 0.9313186813186813
|
|
|
|
key: train_precision
|
|
value: [0.99115044 1. 1. 0.99130435 1. 0.99130435
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.997375913813005
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.91666667 0.91666667 1. 0.91666667
|
|
0.92307692 1. 1. 0.84615385]
|
|
|
|
mean value: 0.9519230769230769
|
|
|
|
key: train_recall
|
|
value: [0.99115044 1. 1. 1. 1. 1.
|
|
0.99115044 0.98230088 1. 1. ]
|
|
|
|
mean value: 0.9964601769911504
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.88141026 0.88141026 1. 0.88141026
|
|
0.96153846 1. 0.95833333 0.83974359]
|
|
|
|
mean value: 0.9403846153846154
|
|
|
|
key: train_roc_auc
|
|
value: [0.99115044 1. 1. 0.99557522 1. 0.99557522
|
|
0.99557522 0.99115044 1. 1. ]
|
|
|
|
mean value: 0.9969026548672566
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.78571429 0.78571429 1. 0.78571429
|
|
0.92307692 1. 0.92857143 0.73333333]
|
|
|
|
mean value: 0.8942124542124542
|
|
|
|
key: train_jcc
|
|
value: [0.98245614 1. 1. 0.99130435 1. 0.99130435
|
|
0.99115044 0.98230088 1. 1. ]
|
|
|
|
mean value: 0.9938516163436679
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04811549 0.04675889 0.08320355 0.07460022 0.07616639 0.06692505
|
|
0.07287598 0.06743908 0.07123065 0.08591771]
|
|
|
|
mean value: 0.06932330131530762
|
|
|
|
key: score_time
|
|
value: [0.02174902 0.01813269 0.02435803 0.02427435 0.02578211 0.02325583
|
|
0.02110767 0.02467132 0.02283382 0.0238924 ]
|
|
|
|
mean value: 0.02300572395324707
|
|
|
|
key: test_mcc
|
|
value: [0.46291005 0.38924947 0.37073365 0.60001249 0.71400555 0.62103443
|
|
0.51923077 0.28022427 0.51923077 0.52297636]
|
|
|
|
mean value: 0.4999607802128746
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73076923 0.69230769 0.68 0.8 0.84 0.8
|
|
0.76 0.64 0.76 0.76 ]
|
|
|
|
mean value: 0.7463076923076923
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72 0.71428571 0.6 0.7826087 0.8 0.81481481
|
|
0.76923077 0.68965517 0.76923077 0.78571429]
|
|
|
|
mean value: 0.7445540221342319
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.66666667 0.75 0.81818182 1. 0.73333333
|
|
0.76923077 0.625 0.76923077 0.73333333]
|
|
|
|
mean value: 0.761497668997669
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.69230769 0.76923077 0.5 0.75 0.66666667 0.91666667
|
|
0.76923077 0.76923077 0.76923077 0.84615385]
|
|
|
|
mean value: 0.7448717948717949
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.73076923 0.69230769 0.67307692 0.79807692 0.83333333 0.80448718
|
|
0.75961538 0.63461538 0.75961538 0.75641026]
|
|
|
|
mean value: 0.7442307692307693
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5625 0.55555556 0.42857143 0.64285714 0.66666667 0.6875
|
|
0.625 0.52631579 0.625 0.64705882]
|
|
|
|
mean value: 0.596702540665389
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.38953614 0.3836174 0.3813796 0.37001133 0.37968659 0.37475681
|
|
0.37812352 0.38004684 0.37554431 0.39142656]
|
|
|
|
mean value: 0.3804129123687744
|
|
|
|
key: score_time
|
|
value: [0.0094707 0.00989366 0.00984287 0.00936651 0.01023889 0.00987649
|
|
0.00988579 0.00898886 0.01000381 0.00975585]
|
|
|
|
mean value: 0.009732341766357422
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.85163063 0.83974359 1. 0.85163063
|
|
0.92307692 0.92307692 0.92259985 0.67948718]
|
|
|
|
mean value: 0.8991245716080256
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.92 0.92 1. 0.92 0.96 0.96 0.96 0.84]
|
|
|
|
mean value: 0.948
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.92307692 0.91666667 1. 0.92307692
|
|
0.96 0.96 0.96296296 0.84615385]
|
|
|
|
mean value: 0.9491937321937322
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.85714286 0.91666667 1. 0.85714286
|
|
1. 1. 0.92857143 0.84615385]
|
|
|
|
mean value: 0.9405677655677656
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.91666667 1. 1.
|
|
0.92307692 0.92307692 1. 0.84615385]
|
|
|
|
mean value: 0.9608974358974359
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.92307692 0.91987179 1. 0.92307692
|
|
0.96153846 0.96153846 0.95833333 0.83974359]
|
|
|
|
mean value: 0.9487179487179487
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.85714286 0.84615385 1. 0.85714286
|
|
0.92307692 0.92307692 0.92857143 0.73333333]
|
|
|
|
mean value: 0.9068498168498168
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.87
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02108741 0.02180362 0.02211237 0.0221622 0.02169275 0.0905633
|
|
0.09792352 0.02245069 0.02209115 0.02490425]
|
|
|
|
mean value: 0.03667912483215332
|
|
|
|
key: score_time
|
|
value: [0.01709557 0.01316833 0.0123136 0.01393843 0.0167563 0.01342654
|
|
0.02364779 0.02040696 0.01349974 0.01250529]
|
|
|
|
mean value: 0.015675854682922364
|
|
|
|
key: test_mcc
|
|
value: [ 0.38924947 -0.18257419 0.20645591 0.51923077 0.27742513 0.22017621
|
|
0.28022427 -0.1441742 0.44702443 0.04516223]
|
|
|
|
mean value: 0.20582000368846934
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99118926 1. 0.96535144 0.98252973 0.97390505
|
|
1. 0.99122807 0.99122807 0.94847348]
|
|
|
|
mean value: 0.9843905090333227
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.42307692 0.6 0.76 0.64 0.6
|
|
0.64 0.44 0.72 0.52 ]
|
|
|
|
mean value: 0.6035384615384616
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99557522 1. 0.98237885 0.99118943 0.98678414
|
|
1. 0.99559471 0.99559471 0.97356828]
|
|
|
|
mean value: 0.9920685353397528
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.54545455 0.61538462 0.75 0.60869565 0.64285714
|
|
0.68965517 0.53333333 0.75862069 0.5 ]
|
|
|
|
mean value: 0.6358286865558229
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99559471 1. 0.98275862 0.99130435 0.98701299
|
|
1. 0.99559471 0.99559471 0.97413793]
|
|
|
|
mean value: 0.9921998027532375
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.45 0.57142857 0.75 0.63636364 0.5625
|
|
0.625 0.47058824 0.6875 0.54545455]
|
|
|
|
mean value: 0.5965501655207538
|
|
|
|
key: train_precision
|
|
value: [1. 0.99122807 1. 0.96610169 0.98275862 0.97435897
|
|
1. 0.99122807 0.99122807 0.94957983]
|
|
|
|
mean value: 0.9846483332422973
|
|
|
|
key: test_recall
|
|
value: [0.76923077 0.69230769 0.66666667 0.75 0.58333333 0.75
|
|
0.76923077 0.61538462 0.84615385 0.46153846]
|
|
|
|
mean value: 0.6903846153846154
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.69230769 0.42307692 0.6025641 0.75961538 0.63782051 0.60576923
|
|
0.63461538 0.43269231 0.71474359 0.5224359 ]
|
|
|
|
mean value: 0.6025641025641025
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99557522 1. 0.98230088 0.99115044 0.98672566
|
|
1. 0.99561404 0.99561404 0.97368421]
|
|
|
|
mean value: 0.9920664493091135
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.375 0.44444444 0.6 0.4375 0.47368421
|
|
0.52631579 0.36363636 0.61111111 0.33333333]
|
|
|
|
mean value: 0.4720580808080808
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99122807 1. 0.96610169 0.98275862 0.97435897
|
|
1. 0.99122807 0.99122807 0.94957983]
|
|
|
|
mean value: 0.9846483332422973
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01482725 0.02418423 0.03495097 0.03472853 0.03881168 0.03449345
|
|
0.03447318 0.04217696 0.05941725 0.05247021]
|
|
|
|
mean value: 0.037053370475769044
|
|
|
|
key: score_time
|
|
value: [0.01207805 0.023211 0.02392936 0.02399015 0.0239594 0.02243161
|
|
0.02000022 0.02241969 0.0231111 0.02405715]
|
|
|
|
mean value: 0.021918773651123047
|
|
|
|
key: test_mcc
|
|
value: [0.9258201 0.69230769 0.85163063 0.83974359 0.92259985 0.85163063
|
|
0.92307692 1. 0.83974359 0.67948718]
|
|
|
|
mean value: 0.8526040174827166
|
|
|
|
key: train_mcc
|
|
value: [0.92212497 0.93897274 0.93922501 0.95627534 0.94845006 0.93008108
|
|
0.93924826 0.92248004 0.92161308 0.95688345]
|
|
|
|
mean value: 0.9375354038998079
|
|
|
|
key: test_accuracy
|
|
value: [0.96153846 0.84615385 0.92 0.92 0.96 0.92
|
|
0.96 1. 0.92 0.84 ]
|
|
|
|
mean value: 0.9247692307692308
|
|
|
|
key: train_accuracy
|
|
value: [0.96017699 0.96902655 0.969163 0.97797357 0.97356828 0.96475771
|
|
0.969163 0.96035242 0.96035242 0.97797357]
|
|
|
|
mean value: 0.9682507504580719
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.84615385 0.92307692 0.91666667 0.95652174 0.92307692
|
|
0.96 1. 0.92307692 0.84615385]
|
|
|
|
mean value: 0.9254726867335563
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:168: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:171: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.96137339 0.96969697 0.96995708 0.97835498 0.97435897 0.96551724
|
|
0.96969697 0.96137339 0.96103896 0.97835498]
|
|
|
|
mean value: 0.9689722935542087
|
|
|
|
key: test_precision
|
|
value: [1. 0.84615385 0.85714286 0.91666667 1. 0.85714286
|
|
1. 1. 0.92307692 0.84615385]
|
|
|
|
mean value: 0.9246336996336997
|
|
|
|
key: train_precision
|
|
value: [0.93333333 0.94915254 0.94957983 0.96581197 0.95 0.94915254
|
|
0.94915254 0.93333333 0.94067797 0.95762712]
|
|
|
|
mean value: 0.9477821176275812
|
|
|
|
key: test_recall
|
|
value: [0.92307692 0.84615385 1. 0.91666667 0.91666667 1.
|
|
0.92307692 1. 0.92307692 0.84615385]
|
|
|
|
mean value: 0.9294871794871795
|
|
|
|
key: train_recall
|
|
value: [0.99115044 0.99115044 0.99122807 0.99122807 1. 0.98245614
|
|
0.99115044 0.99115044 0.98230088 1. ]
|
|
|
|
mean value: 0.9911814935569011
|
|
|
|
key: test_roc_auc
|
|
value: [0.96153846 0.84615385 0.92307692 0.91987179 0.95833333 0.92307692
|
|
0.96153846 1. 0.91987179 0.83974359]
|
|
|
|
mean value: 0.9253205128205129
|
|
|
|
key: train_roc_auc
|
|
value: [0.96017699 0.96902655 0.96906536 0.97791492 0.97345133 0.9646794
|
|
0.96925943 0.9604875 0.96044869 0.97807018]
|
|
|
|
mean value: 0.9682580344666977
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.73333333 0.85714286 0.84615385 0.91666667 0.85714286
|
|
0.92307692 1. 0.85714286 0.73333333]
|
|
|
|
mean value: 0.8647069597069597
|
|
|
|
key: train_jcc
|
|
value: [0.92561983 0.94117647 0.94166667 0.95762712 0.95 0.93333333
|
|
0.94117647 0.92561983 0.925 0.95762712]
|
|
|
|
mean value: 0.9398846847886094
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.23313832 0.23423362 0.23843575 0.33675814 0.22626853 0.1537931
|
|
0.36849594 0.25207734 0.25877261 0.22935581]
|
|
|
|
mean value: 0.25313291549682615
|
|
|
|
key: score_time
|
|
value: [0.02324677 0.02040601 0.0177052 0.0334866 0.01204157 0.02399635
|
|
0.02332544 0.02243543 0.02139521 0.02241635]
|
|
|
|
mean value: 0.022045493125915527
|
|
|
|
key: test_mcc
|
|
value: [0.9258201 0.69230769 0.85163063 0.83974359 0.92259985 0.85163063
|
|
0.92307692 1. 0.83974359 0.67948718]
|
|
|
|
mean value: 0.8526040174827166
|
|
|
|
key: train_mcc
|
|
value: [0.92212497 0.93897274 0.93922501 0.95627534 0.94845006 0.98252973
|
|
0.93924826 0.92248004 0.92161308 0.95688345]
|
|
|
|
mean value: 0.9427802688967852
|
|
|
|
key: test_accuracy
|
|
value: [0.96153846 0.84615385 0.92 0.92 0.96 0.92
|
|
0.96 1. 0.92 0.84 ]
|
|
|
|
mean value: 0.9247692307692308
|
|
|
|
key: train_accuracy
|
|
value: [0.96017699 0.96902655 0.969163 0.97797357 0.97356828 0.99118943
|
|
0.969163 0.96035242 0.96035242 0.97797357]
|
|
|
|
mean value: 0.9708939222642392
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.84615385 0.92307692 0.91666667 0.95652174 0.92307692
|
|
0.96 1. 0.92307692 0.84615385]
|
|
|
|
mean value: 0.9254726867335563
|
|
|
|
key: train_fscore
|
|
value: [0.96137339 0.96969697 0.96995708 0.97835498 0.97435897 0.99130435
|
|
0.96969697 0.96137339 0.96103896 0.97835498]
|
|
|
|
mean value: 0.9715510041988863
|
|
|
|
key: test_precision
|
|
value: [1. 0.84615385 0.85714286 0.91666667 1. 0.85714286
|
|
1. 1. 0.92307692 0.84615385]
|
|
|
|
mean value: 0.9246336996336997
|
|
|
|
key: train_precision
|
|
value: [0.93333333 0.94915254 0.94957983 0.96581197 0.95 0.98275862
|
|
0.94915254 0.93333333 0.94067797 0.95762712]
|
|
|
|
mean value: 0.9511427254592586
|
|
|
|
key: test_recall
|
|
value: [0.92307692 0.84615385 1. 0.91666667 0.91666667 1.
|
|
0.92307692 1. 0.92307692 0.84615385]
|
|
|
|
mean value: 0.9294871794871795
|
|
|
|
key: train_recall
|
|
value: [0.99115044 0.99115044 0.99122807 0.99122807 1. 1.
|
|
0.99115044 0.99115044 0.98230088 1. ]
|
|
|
|
mean value: 0.9929358795218134
|
|
|
|
key: test_roc_auc
|
|
value: [0.96153846 0.84615385 0.92307692 0.91987179 0.95833333 0.92307692
|
|
0.96153846 1. 0.91987179 0.83974359]
|
|
|
|
mean value: 0.9253205128205129
|
|
|
|
key: train_roc_auc
|
|
value: [0.96017699 0.96902655 0.96906536 0.97791492 0.97345133 0.99115044
|
|
0.96925943 0.9604875 0.96044869 0.97807018]
|
|
|
|
mean value: 0.9709051389535787
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.73333333 0.85714286 0.84615385 0.91666667 0.85714286
|
|
0.92307692 1. 0.85714286 0.73333333]
|
|
|
|
mean value: 0.8647069597069597
|
|
|
|
key: train_jcc
|
|
value: [0.92561983 0.94117647 0.94166667 0.95762712 0.95 0.98275862
|
|
0.94117647 0.92561983 0.925 0.95762712]
|
|
|
|
mean value: 0.9448272135242416
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03459048 0.03621244 0.03454232 0.03617811 0.03667927 0.03526711
|
|
0.03593946 0.03665805 0.03587699 0.03511906]
|
|
|
|
mean value: 0.03570632934570313
|
|
|
|
key: score_time
|
|
value: [0.01214147 0.01206398 0.01200223 0.0132072 0.01338696 0.0131669
|
|
0.01348519 0.01330256 0.01318336 0.01189971]
|
|
|
|
mean value: 0.012783956527709962
|
|
|
|
key: test_mcc
|
|
value: [0.92 0.84270097 0.92295821 0.84270097 0.73061343 0.88443328
|
|
0.715 0.76603235 0.71889189 0.75712849]
|
|
|
|
mean value: 0.8100459580758218
|
|
|
|
key: train_mcc
|
|
value: [0.86574363 0.87008467 0.88377996 0.87444183 0.90202478 0.87036287
|
|
0.87471218 0.89304742 0.8834738 0.90607263]
|
|
|
|
mean value: 0.8823743771558636
|
|
|
|
key: test_accuracy
|
|
value: [0.96 0.92 0.96 0.92 0.85714286 0.93877551
|
|
0.85714286 0.87755102 0.85714286 0.87755102]
|
|
|
|
mean value: 0.902530612244898
|
|
|
|
key: train_accuracy
|
|
value: [0.93243243 0.93468468 0.94144144 0.93693694 0.9505618 0.93483146
|
|
0.93707865 0.94606742 0.94157303 0.95280899]
|
|
|
|
mean value: 0.9408416843810102
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.92307692 0.95833333 0.92307692 0.86792453 0.94117647
|
|
0.85714286 0.86956522 0.86792453 0.88461538]
|
|
|
|
mean value: 0.9052836165828734
|
|
|
|
key: train_fscore
|
|
value: [0.9339207 0.93598234 0.94273128 0.9380531 0.95175439 0.93626374
|
|
0.93832599 0.94713656 0.94222222 0.95343681]
|
|
|
|
mean value: 0.9419827126292131
|
|
|
|
key: test_precision
|
|
value: [0.96 0.88888889 1. 0.88888889 0.79310345 0.88888889
|
|
0.84 0.95238095 0.82142857 0.85185185]
|
|
|
|
mean value: 0.8885431490603904
|
|
|
|
key: train_precision
|
|
value: [0.9137931 0.91774892 0.92241379 0.92173913 0.93133047 0.91810345
|
|
0.92207792 0.92672414 0.92982456 0.93886463]
|
|
|
|
mean value: 0.9242620115347717
|
|
|
|
key: test_recall
|
|
value: [0.96 0.96 0.92 0.96 0.95833333 1.
|
|
0.875 0.8 0.92 0.92 ]
|
|
|
|
mean value: 0.9273333333333333
|
|
|
|
key: train_recall
|
|
value: [0.95495495 0.95495495 0.96396396 0.95495495 0.97309417 0.95515695
|
|
0.95515695 0.96846847 0.95495495 0.96846847]
|
|
|
|
mean value: 0.96041287924696
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.92 0.96 0.92 0.85916667 0.94
|
|
0.8575 0.87916667 0.85583333 0.87666667]
|
|
|
|
mean value: 0.9028333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.93243243 0.93468468 0.94144144 0.93693694 0.95051105 0.93478568
|
|
0.93703793 0.94611764 0.94160304 0.9528441 ]
|
|
|
|
mean value: 0.9408394942027228
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.85714286 0.92 0.85714286 0.76666667 0.88888889
|
|
0.75 0.76923077 0.76666667 0.79310345]
|
|
|
|
mean value: 0.8291919077091491
|
|
|
|
key: train_jcc
|
|
value: [0.87603306 0.87966805 0.89166667 0.88333333 0.90794979 0.88016529
|
|
0.88381743 0.89958159 0.8907563 0.91101695]
|
|
|
|
mean value: 0.890398845671255
|
|
|
|
MCC on Blind test: 0.77
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.96315241 0.80819511 0.99288607 0.82086325 0.86465096 1.00852299
|
|
0.80899525 0.96896267 0.86382556 0.81577754]
|
|
|
|
mean value: 0.8915831804275512
|
|
|
|
key: score_time
|
|
value: [0.01347756 0.01319551 0.01491904 0.01334333 0.01338387 0.01394129
|
|
0.01341248 0.013376 0.01485038 0.01479459]
|
|
|
|
mean value: 0.013869404792785645
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.96076892 0.88640526 0.92295821 0.87813144 1.
|
|
0.95993456 0.88443328 0.91833333 0.88388348]
|
|
|
|
mean value: 0.9255617400791709
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 0.99101119 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999101119056276
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.98 0.94 0.96 0.93877551 1.
|
|
0.97959184 0.93877551 0.95918367 0.93877551]
|
|
|
|
mean value: 0.9615102040816327
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 0.99550562 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9995505617977528
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 0.93617021 0.95833333 0.93617021 1.
|
|
0.9787234 0.93617021 0.96 0.94339623]
|
|
|
|
mean value: 0.9608947595899058
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 0.9955157 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.9995515695067264
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 1. 1. 0.95652174 1.
|
|
1. 1. 0.96 0.89285714]
|
|
|
|
mean value: 0.9770917343526039
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.9955157 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.9995515695067264
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.88 0.92 0.91666667 1.
|
|
0.95833333 0.88 0.96 1. ]
|
|
|
|
mean value: 0.9475
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.9955157 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.9995515695067264
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.98 0.94 0.96 0.93833333 1.
|
|
0.97916667 0.94 0.95916667 0.9375 ]
|
|
|
|
mean value: 0.9614166666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 0.9955056 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.999550559528138
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 0.88 0.92 0.88 1.
|
|
0.95833333 0.88 0.92307692 0.89285714]
|
|
|
|
mean value: 0.9255805860805861
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 0.99107143 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9991071428571429
|
|
|
|
MCC on Blind test: 0.82
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01435113 0.01183224 0.01033449 0.00987959 0.0100081 0.00998855
|
|
0.01026988 0.01016665 0.00984001 0.00985885]
|
|
|
|
mean value: 0.010652947425842284
|
|
|
|
key: score_time
|
|
value: [0.01502943 0.00946522 0.00942898 0.00894809 0.00888109 0.00884604
|
|
0.00928593 0.00882649 0.00891709 0.00886297]
|
|
|
|
mean value: 0.009649133682250977
|
|
|
|
key: test_mcc
|
|
value: [0.84 0.60783067 0.56044854 0.65319726 0.36080239 0.39196475
|
|
0.34666667 0.755 0.55390031 0.6363961 ]
|
|
|
|
mean value: 0.5706206693679877
|
|
|
|
key: train_mcc
|
|
value: [0.58903785 0.62756029 0.58596618 0.60179763 0.61744414 0.58140024
|
|
0.56604953 0.62267888 0.66596271 0.63708908]
|
|
|
|
mean value: 0.6094986536415706
|
|
|
|
key: test_accuracy
|
|
value: [0.92 0.8 0.78 0.82 0.67346939 0.69387755
|
|
0.67346939 0.87755102 0.7755102 0.81632653]
|
|
|
|
mean value: 0.7830204081632653
|
|
|
|
key: train_accuracy
|
|
value: [0.79279279 0.81306306 0.79279279 0.79954955 0.80449438 0.78876404
|
|
0.77977528 0.80898876 0.83146067 0.81797753]
|
|
|
|
mean value: 0.8029658872355502
|
|
|
|
key: test_fscore
|
|
value: [0.92 0.81481481 0.7755102 0.83636364 0.7037037 0.70588235
|
|
0.66666667 0.88 0.79245283 0.83018868]
|
|
|
|
mean value: 0.7925582888005593
|
|
|
|
key: train_fscore
|
|
value: [0.8034188 0.81917211 0.78899083 0.80860215 0.81987578 0.80084746
|
|
0.79583333 0.81953291 0.83870968 0.82275711]
|
|
|
|
mean value: 0.8117740158013851
|
|
|
|
key: test_precision
|
|
value: [0.92 0.75862069 0.79166667 0.76666667 0.63333333 0.66666667
|
|
0.66666667 0.88 0.75 0.78571429]
|
|
|
|
mean value: 0.7619334975369458
|
|
|
|
key: train_precision
|
|
value: [0.76422764 0.79324895 0.80373832 0.77366255 0.76153846 0.75903614
|
|
0.74319066 0.7751004 0.80246914 0.8 ]
|
|
|
|
mean value: 0.777621226162571
|
|
|
|
key: test_recall
|
|
value: [0.92 0.88 0.76 0.92 0.79166667 0.75
|
|
0.66666667 0.88 0.84 0.88 ]
|
|
|
|
mean value: 0.8288333333333333
|
|
|
|
key: train_recall
|
|
value: [0.84684685 0.84684685 0.77477477 0.84684685 0.88789238 0.84753363
|
|
0.85650224 0.86936937 0.87837838 0.84684685]
|
|
|
|
mean value: 0.8501838161030986
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 0.8 0.78 0.82 0.67583333 0.695
|
|
0.67333333 0.8775 0.77416667 0.815 ]
|
|
|
|
mean value: 0.7830833333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.79279279 0.81306306 0.79279279 0.79954955 0.80430655 0.78863168
|
|
0.77960247 0.80912415 0.83156587 0.81804226]
|
|
|
|
mean value: 0.8029471175211086
|
|
|
|
key: test_jcc
|
|
value: [0.85185185 0.6875 0.63333333 0.71875 0.54285714 0.54545455
|
|
0.5 0.78571429 0.65625 0.70967742]
|
|
|
|
mean value: 0.6631388578565998
|
|
|
|
key: train_jcc
|
|
value: [0.67142857 0.69372694 0.65151515 0.67870036 0.69473684 0.66784452
|
|
0.66089965 0.6942446 0.72222222 0.69888476]
|
|
|
|
mean value: 0.6834203625179707
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01007128 0.00996518 0.01020312 0.01033068 0.01145005 0.01092172
|
|
0.01150632 0.01143599 0.01150846 0.01151633]
|
|
|
|
mean value: 0.010890913009643555
|
|
|
|
key: score_time
|
|
value: [0.00888562 0.00889969 0.00882649 0.00925899 0.00962615 0.00970936
|
|
0.00966907 0.00965095 0.00971079 0.00965929]
|
|
|
|
mean value: 0.009389638900756836
|
|
|
|
key: test_mcc
|
|
value: [0.72057669 0.68 0.71774056 0.52678658 0.64755065 0.59166667
|
|
0.47140452 0.75793094 0.57236448 0.51089422]
|
|
|
|
mean value: 0.6196915318553365
|
|
|
|
key: train_mcc
|
|
value: [0.64907023 0.61717347 0.67117798 0.64875397 0.64501141 0.67705447
|
|
0.6675085 0.63145881 0.65423027 0.66744773]
|
|
|
|
mean value: 0.6528886843624365
|
|
|
|
key: test_accuracy
|
|
value: [0.86 0.84 0.84 0.76 0.81632653 0.79591837
|
|
0.73469388 0.87755102 0.7755102 0.75510204]
|
|
|
|
mean value: 0.8055102040816327
|
|
|
|
key: train_accuracy
|
|
value: [0.82432432 0.80855856 0.83558559 0.82432432 0.82247191 0.83820225
|
|
0.83370787 0.81573034 0.82696629 0.83370787]
|
|
|
|
mean value: 0.8263579309646726
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.84 0.80952381 0.77777778 0.83018868 0.79166667
|
|
0.71111111 0.875 0.80701754 0.76923077]
|
|
|
|
mean value: 0.8068659214557923
|
|
|
|
key: train_fscore
|
|
value: [0.82743363 0.80725624 0.83595506 0.82589286 0.82167043 0.84210526
|
|
0.83555556 0.81531532 0.82379863 0.83408072]
|
|
|
|
mean value: 0.8269063684882629
|
|
|
|
key: test_precision
|
|
value: [0.875 0.84 1. 0.72413793 0.75862069 0.79166667
|
|
0.76190476 0.91304348 0.71875 0.74074074]
|
|
|
|
mean value: 0.8123864268262694
|
|
|
|
key: train_precision
|
|
value: [0.81304348 0.81278539 0.83408072 0.81858407 0.82727273 0.82403433
|
|
0.82819383 0.81531532 0.8372093 0.83035714]
|
|
|
|
mean value: 0.8240876309807807
|
|
|
|
key: test_recall
|
|
value: [0.84 0.84 0.68 0.84 0.91666667 0.79166667
|
|
0.66666667 0.84 0.92 0.8 ]
|
|
|
|
mean value: 0.8135
|
|
|
|
key: train_recall
|
|
value: [0.84234234 0.8018018 0.83783784 0.83333333 0.8161435 0.86098655
|
|
0.84304933 0.81531532 0.81081081 0.83783784]
|
|
|
|
mean value: 0.8299458651476589
|
|
|
|
key: test_roc_auc
|
|
value: [0.86 0.84 0.84 0.76 0.81833333 0.79583333
|
|
0.73333333 0.87833333 0.7725 0.75416667]
|
|
|
|
mean value: 0.80525
|
|
|
|
key: train_roc_auc
|
|
value: [0.82432432 0.80855856 0.83558559 0.82432432 0.82248616 0.83815093
|
|
0.83368683 0.81572941 0.82693007 0.83371713]
|
|
|
|
mean value: 0.8263493313941744
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.72413793 0.68 0.63636364 0.70967742 0.65517241
|
|
0.55172414 0.77777778 0.67647059 0.625 ]
|
|
|
|
mean value: 0.6786323904490168
|
|
|
|
key: train_jcc
|
|
value: [0.70566038 0.67680608 0.71814672 0.70342205 0.69731801 0.72727273
|
|
0.71755725 0.68821293 0.70038911 0.71538462]
|
|
|
|
mean value: 0.7050169867430932
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00954771 0.01051784 0.01046944 0.01038027 0.01053214 0.01175332
|
|
0.01028919 0.01084447 0.00925541 0.01067114]
|
|
|
|
mean value: 0.010426092147827148
|
|
|
|
key: score_time
|
|
value: [0.01178885 0.01480341 0.01264715 0.0150733 0.01574039 0.01233459
|
|
0.01635766 0.01635599 0.0143652 0.01265836]
|
|
|
|
mean value: 0.014212489128112793
|
|
|
|
key: test_mcc
|
|
value: [0.64465837 0.64465837 0.48038446 0.36115756 0.47 0.43226548
|
|
0.14142136 0.69595532 0.43071846 0.34666667]
|
|
|
|
mean value: 0.4647886048397928
|
|
|
|
key: train_mcc
|
|
value: [0.65369056 0.68073982 0.69905687 0.64888568 0.66327176 0.63149177
|
|
0.69485439 0.64065884 0.67242906 0.66763672]
|
|
|
|
mean value: 0.665271547188797
|
|
|
|
key: test_accuracy
|
|
value: [0.82 0.82 0.74 0.68 0.73469388 0.71428571
|
|
0.57142857 0.83673469 0.71428571 0.67346939]
|
|
|
|
mean value: 0.7304897959183674
|
|
|
|
key: train_accuracy
|
|
value: [0.82657658 0.84009009 0.8490991 0.82432432 0.83146067 0.81573034
|
|
0.84719101 0.82022472 0.83595506 0.83370787]
|
|
|
|
mean value: 0.8324359753011439
|
|
|
|
key: test_fscore
|
|
value: [0.80851064 0.83018868 0.73469388 0.69230769 0.73469388 0.68181818
|
|
0.53333333 0.81818182 0.70833333 0.68 ]
|
|
|
|
mean value: 0.7222061431619555
|
|
|
|
key: train_fscore
|
|
value: [0.82298851 0.83678161 0.84526559 0.82191781 0.82915718 0.81531532
|
|
0.84474886 0.8173516 0.83218391 0.83105023]
|
|
|
|
mean value: 0.8296760595767689
|
|
|
|
key: test_precision
|
|
value: [0.86363636 0.78571429 0.75 0.66666667 0.72 0.75
|
|
0.57142857 0.94736842 0.73913043 0.68 ]
|
|
|
|
mean value: 0.7473944743281128
|
|
|
|
key: train_precision
|
|
value: [0.84037559 0.85446009 0.86729858 0.83333333 0.84259259 0.81900452
|
|
0.86046512 0.8287037 0.84976526 0.84259259]
|
|
|
|
mean value: 0.8438591380554358
|
|
|
|
key: test_recall
|
|
value: [0.76 0.88 0.72 0.72 0.75 0.625 0.5 0.72 0.68 0.68 ]
|
|
|
|
mean value: 0.7035
|
|
|
|
key: train_recall
|
|
value: [0.80630631 0.81981982 0.82432432 0.81081081 0.8161435 0.81165919
|
|
0.82959641 0.80630631 0.81531532 0.81981982]
|
|
|
|
mean value: 0.8160101805841716
|
|
|
|
key: test_roc_auc
|
|
value: [0.82 0.82 0.74 0.68 0.735 0.7125
|
|
0.57 0.83916667 0.715 0.67333333]
|
|
|
|
mean value: 0.7305
|
|
|
|
key: train_roc_auc
|
|
value: [0.82657658 0.84009009 0.8490991 0.82432432 0.83149517 0.81573951
|
|
0.84723064 0.82019351 0.83590878 0.83367673]
|
|
|
|
mean value: 0.8324334424110209
|
|
|
|
key: test_jcc
|
|
value: [0.67857143 0.70967742 0.58064516 0.52941176 0.58064516 0.51724138
|
|
0.36363636 0.69230769 0.5483871 0.51515152]
|
|
|
|
mean value: 0.5715674982392904
|
|
|
|
key: train_jcc
|
|
value: [0.69921875 0.71936759 0.732 0.69767442 0.70817121 0.68821293
|
|
0.7312253 0.69111969 0.71259843 0.7109375 ]
|
|
|
|
mean value: 0.7090525804279022
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02455401 0.01976275 0.02020121 0.01943421 0.01968145 0.02007937
|
|
0.01907325 0.01924157 0.01907706 0.02080584]
|
|
|
|
mean value: 0.020191073417663574
|
|
|
|
key: score_time
|
|
value: [0.01594257 0.01245522 0.01158261 0.01231074 0.01259041 0.01275015
|
|
0.01280093 0.01202369 0.01226616 0.01140428]
|
|
|
|
mean value: 0.012612676620483399
|
|
|
|
key: test_mcc
|
|
value: [0.84270097 0.81649658 0.76244374 0.76991885 0.63819901 0.88443328
|
|
0.63819901 0.83973406 0.69302938 0.6446564 ]
|
|
|
|
mean value: 0.7529811283975508
|
|
|
|
key: train_mcc
|
|
value: [0.81482171 0.80993817 0.83784204 0.79746663 0.84229483 0.82838049
|
|
0.82583735 0.83001583 0.81687801 0.83001583]
|
|
|
|
mean value: 0.8233490893695841
|
|
|
|
key: test_accuracy
|
|
value: [0.92 0.9 0.88 0.88 0.81632653 0.93877551
|
|
0.81632653 0.91836735 0.83673469 0.81632653]
|
|
|
|
mean value: 0.8722857142857143
|
|
|
|
key: train_accuracy
|
|
value: [0.90540541 0.90315315 0.91666667 0.8963964 0.91910112 0.91235955
|
|
0.91011236 0.91235955 0.90561798 0.91235955]
|
|
|
|
mean value: 0.9093531733981172
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.90909091 0.88461538 0.88888889 0.82352941 0.94117647
|
|
0.82352941 0.91666667 0.85714286 0.83636364]
|
|
|
|
mean value: 0.8804080559962912
|
|
|
|
key: train_fscore
|
|
value: [0.90987124 0.90752688 0.92077088 0.9017094 0.92307692 0.91648822
|
|
0.91525424 0.91684435 0.9106383 0.91684435]
|
|
|
|
mean value: 0.9139024786305163
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.83333333 0.85185185 0.82758621 0.77777778 0.88888889
|
|
0.77777778 0.95652174 0.77419355 0.76666667]
|
|
|
|
mean value: 0.8343486679599268
|
|
|
|
key: train_precision
|
|
value: [0.86885246 0.86831276 0.87755102 0.85772358 0.88163265 0.87704918
|
|
0.86746988 0.87044534 0.86290323 0.87044534]
|
|
|
|
mean value: 0.8702385440834701
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.92 0.96 0.875 1. 0.875 0.88 0.96 0.92 ]
|
|
|
|
mean value: 0.935
|
|
|
|
key: train_recall
|
|
value: [0.95495495 0.95045045 0.96846847 0.95045045 0.96860987 0.95964126
|
|
0.96860987 0.96846847 0.96396396 0.96846847]
|
|
|
|
mean value: 0.962208621177231
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 0.9 0.88 0.88 0.8175 0.94
|
|
0.8175 0.91916667 0.83416667 0.81416667]
|
|
|
|
mean value: 0.87225
|
|
|
|
key: train_roc_auc
|
|
value: [0.90540541 0.90315315 0.91666667 0.8963964 0.91898962 0.91225306
|
|
0.90998061 0.91248536 0.9057488 0.91248536]
|
|
|
|
mean value: 0.9093564416434372
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.83333333 0.79310345 0.8 0.7 0.88888889
|
|
0.7 0.84615385 0.75 0.71875 ]
|
|
|
|
mean value: 0.7887372373794788
|
|
|
|
key: train_jcc
|
|
value: [0.83464567 0.83070866 0.8531746 0.82101167 0.85714286 0.8458498
|
|
0.84375 0.84645669 0.8359375 0.84645669]
|
|
|
|
mean value: 0.8415134152376186
|
|
|
|
MCC on Blind test: 0.75
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.03499746 2.0999229 1.86718345 2.35692716 2.00013876 1.91443801
|
|
1.94074488 1.80898762 1.93776011 1.79052377]
|
|
|
|
mean value: 1.975162410736084
|
|
|
|
key: score_time
|
|
value: [0.02622867 0.01369429 0.01386547 0.01393795 0.01472187 0.01529217
|
|
0.01370955 0.01532626 0.01748395 0.01541948]
|
|
|
|
mean value: 0.015967965126037598
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.92 0.88640526 0.96076892 0.83920658 0.96
|
|
0.95993456 0.84852814 0.80235519 0.88388348]
|
|
|
|
mean value: 0.9021851044654765
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.96 0.94 0.98 0.91836735 0.97959184
|
|
0.97959184 0.91836735 0.89795918 0.93877551]
|
|
|
|
mean value: 0.949265306122449
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.96 0.93617021 0.98039216 0.91304348 0.97959184
|
|
0.9787234 0.91304348 0.89361702 0.94339623]
|
|
|
|
mean value: 0.9477569651566838
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96 1. 0.96153846 0.95454545 0.96
|
|
1. 1. 0.95454545 0.89285714]
|
|
|
|
mean value: 0.9683486513486513
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 0.96 0.88 1. 0.875 1.
|
|
0.95833333 0.84 0.84 1. ]
|
|
|
|
mean value: 0.9313333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96 0.94 0.98 0.9175 0.98
|
|
0.97916667 0.92 0.89916667 0.9375 ]
|
|
|
|
mean value: 0.9493333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.92307692 0.88 0.96153846 0.84 0.96
|
|
0.95833333 0.84 0.80769231 0.89285714]
|
|
|
|
mean value: 0.9023498168498169
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02805066 0.02016377 0.01816463 0.01702118 0.01913452 0.01711273
|
|
0.0228157 0.0228436 0.02031279 0.01806664]
|
|
|
|
mean value: 0.020368623733520507
|
|
|
|
key: score_time
|
|
value: [0.0102489 0.00965166 0.0088439 0.00859785 0.00871611 0.01119018
|
|
0.01182079 0.0093441 0.00882077 0.00860953]
|
|
|
|
mean value: 0.009584379196166993
|
|
|
|
key: test_mcc
|
|
value: [0.76244374 1. 1. 0.96076892 0.87813144 1.
|
|
0.95993456 0.96 0.92153718 0.79632832]
|
|
|
|
mean value: 0.9239144158581891
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88 1. 1. 0.98 0.93877551 1.
|
|
0.97959184 0.97959184 0.95918367 0.89795918]
|
|
|
|
mean value: 0.9615102040816327
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 1. 1. 0.97959184 0.93617021 1.
|
|
0.9787234 0.97959184 0.95833333 0.90196078]
|
|
|
|
mean value: 0.9609371408137724
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91304348 1. 1. 1. 0.95652174 1.
|
|
1. 1. 1. 0.88461538]
|
|
|
|
mean value: 0.9754180602006689
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.84 1. 1. 0.96 0.91666667 1.
|
|
0.95833333 0.96 0.92 0.92 ]
|
|
|
|
mean value: 0.9475
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88 1. 1. 0.98 0.93833333 1.
|
|
0.97916667 0.98 0.96 0.8975 ]
|
|
|
|
mean value: 0.9615
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 1. 1. 0.96 0.88 1.
|
|
0.95833333 0.96 0.92 0.82142857]
|
|
|
|
mean value: 0.9277539682539683
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1131525 0.11387014 0.11412191 0.11642528 0.11563301 0.11530852
|
|
0.11376071 0.11487508 0.13210487 0.16346359]
|
|
|
|
mean value: 0.12127156257629394
|
|
|
|
key: score_time
|
|
value: [0.01732659 0.01757503 0.01776457 0.01776791 0.01772571 0.01758981
|
|
0.01755643 0.01780224 0.02342415 0.02385879]
|
|
|
|
mean value: 0.018839120864868164
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.88640526 0.92 0.96076892 0.91833333 0.96
|
|
0.91833333 0.96 0.83666667 0.81223286]
|
|
|
|
mean value: 0.9133509301489711
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.94 0.96 0.98 0.95918367 0.97959184
|
|
0.95918367 0.97959184 0.91836735 0.89795918]
|
|
|
|
mean value: 0.9553877551020408
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98039216 0.94339623 0.96 0.98039216 0.95833333 0.97959184
|
|
0.95833333 0.97959184 0.92 0.90909091]
|
|
|
|
mean value: 0.9569121789367548
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96153846 0.89285714 0.96 0.96153846 0.95833333 0.96
|
|
0.95833333 1. 0.92 0.83333333]
|
|
|
|
mean value: 0.9405934065934066
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96 1. 0.95833333 1.
|
|
0.95833333 0.96 0.92 1. ]
|
|
|
|
mean value: 0.9756666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.94 0.96 0.98 0.95916667 0.98
|
|
0.95916667 0.98 0.91833333 0.89583333]
|
|
|
|
mean value: 0.9552499999999999
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96153846 0.89285714 0.92307692 0.96153846 0.92 0.96
|
|
0.92 0.96 0.85185185 0.83333333]
|
|
|
|
mean value: 0.9184196174196174
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02605653 0.020082 0.01426554 0.01442385 0.01441646 0.01451516
|
|
0.01466107 0.01469254 0.0145874 0.01441717]
|
|
|
|
mean value: 0.016211771965026857
|
|
|
|
key: score_time
|
|
value: [0.02344012 0.01235223 0.01225805 0.01217318 0.01216507 0.01230502
|
|
0.01227427 0.01253939 0.01223445 0.01682353]
|
|
|
|
mean value: 0.01385653018951416
|
|
|
|
key: test_mcc
|
|
value: [0.76991885 0.73484692 0.52678658 0.84270097 0.68145382 0.92128466
|
|
0.52366061 0.57723644 0.49255205 0.48142509]
|
|
|
|
mean value: 0.6551866008925364
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88 0.86 0.76 0.92 0.83673469 0.95918367
|
|
0.75510204 0.7755102 0.73469388 0.73469388]
|
|
|
|
mean value: 0.8215918367346939
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.84444444 0.73913043 0.91666667 0.81818182 0.95652174
|
|
0.71428571 0.74418605 0.69767442 0.71111111]
|
|
|
|
mean value: 0.8011767611110382
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95238095 0.95 0.80952381 0.95652174 0.9 1.
|
|
0.83333333 0.88888889 0.83333333 0.8 ]
|
|
|
|
mean value: 0.8923982056590752
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.76 0.68 0.88 0.75 0.91666667
|
|
0.625 0.64 0.6 0.64 ]
|
|
|
|
mean value: 0.7291666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88 0.86 0.76 0.92 0.835 0.95833333
|
|
0.7525 0.77833333 0.7375 0.73666667]
|
|
|
|
mean value: 0.8218333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.73076923 0.5862069 0.84615385 0.69230769 0.91666667
|
|
0.55555556 0.59259259 0.53571429 0.55172414]
|
|
|
|
mean value: 0.6776921673473397
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.69676876 2.80759549 2.68800616 2.32241178 2.43454385 2.91730642
|
|
2.76242924 2.54836512 1.70775008 1.78542304]
|
|
|
|
mean value: 2.4670599937438964
|
|
|
|
key: score_time
|
|
value: [0.13430142 0.12456036 0.1438818 0.10578561 0.14634299 0.13477516
|
|
0.12637496 0.09941649 0.10094881 0.09945416]
|
|
|
|
mean value: 0.12158417701721191
|
|
|
|
key: test_mcc
|
|
value: [0.92 0.96076892 1. 0.96076892 0.91833333 0.96
|
|
0.95993456 0.96 0.87833333 0.88388348]
|
|
|
|
mean value: 0.9402022548546277
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96 0.98 1. 0.98 0.95918367 0.97959184
|
|
0.97959184 0.97959184 0.93877551 0.93877551]
|
|
|
|
mean value: 0.9695510204081632
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96 0.98039216 1. 0.98039216 0.95833333 0.97959184
|
|
0.9787234 0.97959184 0.93877551 0.94339623]
|
|
|
|
mean value: 0.9699196461402706
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96 0.96153846 1. 0.96153846 0.95833333 0.96
|
|
1. 1. 0.95833333 0.89285714]
|
|
|
|
mean value: 0.9652600732600732
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 1. 0.95833333 1.
|
|
0.95833333 0.96 0.92 1. ]
|
|
|
|
mean value: 0.9756666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.98 1. 0.98 0.95916667 0.98
|
|
0.97916667 0.98 0.93916667 0.9375 ]
|
|
|
|
mean value: 0.9695
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: test_jcc
|
|
value: [0.92307692 0.96153846 1. 0.96153846 0.92 0.96
|
|
0.95833333 0.96 0.88461538 0.89285714]
|
|
|
|
mean value: 0.9421959706959707
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.9
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.91654634 0.98295927 0.96927714 0.94499826 0.99785066 0.96956062
|
|
0.96395802 1.06285167 0.92931247 1.43423033]
|
|
|
|
mean value: 1.0171544790267943
|
|
|
|
key: score_time
|
|
value: [0.20817733 0.20496678 0.17890882 0.1978817 0.17763162 0.20473719
|
|
0.2104454 0.18232894 0.16462111 0.15226412]
|
|
|
|
mean value: 0.1881963014602661
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.96076892 1. 0.96076892 0.87833333 0.92153718
|
|
0.79666667 0.91833333 0.83666667 0.88388348]
|
|
|
|
mean value: 0.9117727422320718
|
|
|
|
key: train_mcc
|
|
value: [0.97772549 0.97756674 0.97772549 0.98214142 0.99105105 0.97338596
|
|
0.96901669 0.97319193 0.98218183 0.98652689]
|
|
|
|
mean value: 0.9790513480628695
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.98 1. 0.98 0.93877551 0.95918367
|
|
0.89795918 0.95918367 0.91836735 0.93877551]
|
|
|
|
mean value: 0.9552244897959183
|
|
|
|
key: train_accuracy
|
|
value: [0.98873874 0.98873874 0.98873874 0.99099099 0.99550562 0.98651685
|
|
0.98426966 0.98651685 0.99101124 0.99325843]
|
|
|
|
mean value: 0.9894285858892601
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.98039216 1. 0.98039216 0.93877551 0.96
|
|
0.89795918 0.96 0.92 0.94339623]
|
|
|
|
mean value: 0.956050707075283
|
|
|
|
key: train_fscore
|
|
value: [0.98886414 0.98881432 0.98886414 0.99107143 0.99553571 0.98672566
|
|
0.98454746 0.98660714 0.99107143 0.99325843]
|
|
|
|
mean value: 0.9895359869088803
|
|
|
|
key: test_precision
|
|
value: [1. 0.96153846 1. 0.96153846 0.92 0.92307692
|
|
0.88 0.96 0.92 0.89285714]
|
|
|
|
mean value: 0.941901098901099
|
|
|
|
key: train_precision
|
|
value: [0.97797357 0.98222222 0.97797357 0.98230088 0.99111111 0.97379913
|
|
0.96956522 0.97787611 0.98230088 0.99103139]
|
|
|
|
mean value: 0.9806154080166792
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 1. 0.95833333 1.
|
|
0.91666667 0.96 0.92 1. ]
|
|
|
|
mean value: 0.9715
|
|
|
|
key: train_recall
|
|
value: [1. 0.9954955 1. 1. 1. 1. 1.
|
|
0.9954955 1. 0.9954955]
|
|
|
|
mean value: 0.9986486486486487
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.98 1. 0.98 0.93916667 0.96
|
|
0.89833333 0.95916667 0.91833333 0.9375 ]
|
|
|
|
mean value: 0.95525
|
|
|
|
key: train_roc_auc
|
|
value: [0.98873874 0.98873874 0.98873874 0.99099099 0.9954955 0.98648649
|
|
0.98423423 0.98653699 0.99103139 0.99326344]
|
|
|
|
mean value: 0.9894255241788874
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.96153846 1. 0.96153846 0.88461538 0.92307692
|
|
0.81481481 0.92307692 0.85185185 0.89285714]
|
|
|
|
mean value: 0.9173369963369964
|
|
|
|
key: train_jcc
|
|
value: [0.97797357 0.97787611 0.97797357 0.98230088 0.99111111 0.97379913
|
|
0.96956522 0.97356828 0.98230088 0.98660714]
|
|
|
|
mean value: 0.9793075892605511
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02435184 0.01141787 0.01019692 0.01031661 0.01042128 0.01012516
|
|
0.01014805 0.0099566 0.01006889 0.01015043]
|
|
|
|
mean value: 0.011715364456176759
|
|
|
|
key: score_time
|
|
value: [0.01093507 0.00964713 0.00901341 0.00880551 0.00908613 0.00880623
|
|
0.00902629 0.00888085 0.00878191 0.00887132]
|
|
|
|
mean value: 0.009185385704040528
|
|
|
|
key: test_mcc
|
|
value: [0.72057669 0.68 0.71774056 0.52678658 0.64755065 0.59166667
|
|
0.47140452 0.75793094 0.57236448 0.51089422]
|
|
|
|
mean value: 0.6196915318553365
|
|
|
|
key: train_mcc
|
|
value: [0.64907023 0.61717347 0.67117798 0.64875397 0.64501141 0.67705447
|
|
0.6675085 0.63145881 0.65423027 0.66744773]
|
|
|
|
mean value: 0.6528886843624365
|
|
|
|
key: test_accuracy
|
|
value: [0.86 0.84 0.84 0.76 0.81632653 0.79591837
|
|
0.73469388 0.87755102 0.7755102 0.75510204]
|
|
|
|
mean value: 0.8055102040816327
|
|
|
|
key: train_accuracy
|
|
value: [0.82432432 0.80855856 0.83558559 0.82432432 0.82247191 0.83820225
|
|
0.83370787 0.81573034 0.82696629 0.83370787]
|
|
|
|
mean value: 0.8263579309646726
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.84 0.80952381 0.77777778 0.83018868 0.79166667
|
|
0.71111111 0.875 0.80701754 0.76923077]
|
|
|
|
mean value: 0.8068659214557923
|
|
|
|
key: train_fscore
|
|
value: [0.82743363 0.80725624 0.83595506 0.82589286 0.82167043 0.84210526
|
|
0.83555556 0.81531532 0.82379863 0.83408072]
|
|
|
|
mean value: 0.8269063684882629
|
|
|
|
key: test_precision
|
|
value: [0.875 0.84 1. 0.72413793 0.75862069 0.79166667
|
|
0.76190476 0.91304348 0.71875 0.74074074]
|
|
|
|
mean value: 0.8123864268262694
|
|
|
|
key: train_precision
|
|
value: [0.81304348 0.81278539 0.83408072 0.81858407 0.82727273 0.82403433
|
|
0.82819383 0.81531532 0.8372093 0.83035714]
|
|
|
|
mean value: 0.8240876309807807
|
|
|
|
key: test_recall
|
|
value: [0.84 0.84 0.68 0.84 0.91666667 0.79166667
|
|
0.66666667 0.84 0.92 0.8 ]
|
|
|
|
mean value: 0.8135
|
|
|
|
key: train_recall
|
|
value: [0.84234234 0.8018018 0.83783784 0.83333333 0.8161435 0.86098655
|
|
0.84304933 0.81531532 0.81081081 0.83783784]
|
|
|
|
mean value: 0.8299458651476589
|
|
|
|
key: test_roc_auc
|
|
value: [0.86 0.84 0.84 0.76 0.81833333 0.79583333
|
|
0.73333333 0.87833333 0.7725 0.75416667]
|
|
|
|
mean value: 0.80525
|
|
|
|
key: train_roc_auc
|
|
value: [0.82432432 0.80855856 0.83558559 0.82432432 0.82248616 0.83815093
|
|
0.83368683 0.81572941 0.82693007 0.83371713]
|
|
|
|
mean value: 0.8263493313941744
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.72413793 0.68 0.63636364 0.70967742 0.65517241
|
|
0.55172414 0.77777778 0.67647059 0.625 ]
|
|
|
|
mean value: 0.6786323904490168
|
|
|
|
key: train_jcc
|
|
value: [0.70566038 0.67680608 0.71814672 0.70342205 0.69731801 0.72727273
|
|
0.71755725 0.68821293 0.70038911 0.71538462]
|
|
|
|
mean value: 0.7050169867430932
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.79472208 0.80350232 0.45731807 1.06822419 0.24828315 0.91175652
|
|
0.44438171 1.13784504 0.13120103 0.62630701]
|
|
|
|
mean value: 0.6623541116714478
|
|
|
|
key: score_time
|
|
value: [0.01206136 0.01817369 0.01223946 0.01325536 0.01292014 0.01739454
|
|
0.0146122 0.01222372 0.01143074 0.02074003]
|
|
|
|
mean value: 0.014505124092102051
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 1. 1. 0.92 0.96 0.96
|
|
0.95993456 1. 0.96 0.88388348]
|
|
|
|
mean value: 0.9566776243209462
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96 1. 1. 0.96 0.97959184 0.97959184
|
|
0.97959184 1. 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9777142857142858
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 1. 1. 0.96 0.97959184 0.97959184
|
|
0.9787234 1. 0.97959184 0.94339623]
|
|
|
|
mean value: 0.9779228474207828
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.96 0.96 0.96
|
|
1. 1. 1. 0.89285714]
|
|
|
|
mean value: 0.9772857142857143
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 1. 0.96 1. 1.
|
|
0.95833333 1. 0.96 1. ]
|
|
|
|
mean value: 0.9798333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 1. 1. 0.96 0.98 0.98
|
|
0.97916667 1. 0.98 0.9375 ]
|
|
|
|
mean value: 0.9776666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.92 1. 1. 0.92307692 0.96 0.96
|
|
0.95833333 1. 0.96 0.89285714]
|
|
|
|
mean value: 0.95742673992674
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.93
|
|
|
|
Accuracy on Blind test: 0.97
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06679082 0.08117557 0.06612992 0.07416844 0.07512784 0.09100699
|
|
0.05638266 0.10009074 0.13629198 0.46553612]
|
|
|
|
mean value: 0.12127010822296143
|
|
|
|
key: score_time
|
|
value: [0.02018023 0.01259112 0.02434754 0.01950455 0.02425027 0.0123632
|
|
0.01235318 0.022475 0.0144074 0.0375824 ]
|
|
|
|
mean value: 0.02000548839569092
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 0.92295821 0.96076892 0.92295821 0.87813144 0.91833333
|
|
0.83666667 0.88443328 0.83973406 0.75712849]
|
|
|
|
mean value: 0.8844070807800867
|
|
|
|
key: train_mcc
|
|
value: [0.97748739 0.96847829 0.96847829 0.97301246 0.97753762 0.96862627
|
|
0.96408299 0.97303357 0.97761752 0.97761617]
|
|
|
|
mean value: 0.9725970581582458
|
|
|
|
key: test_accuracy
|
|
value: [0.96 0.96 0.98 0.96 0.93877551 0.95918367
|
|
0.91836735 0.93877551 0.91836735 0.87755102]
|
|
|
|
mean value: 0.9411020408163265
|
|
|
|
key: train_accuracy
|
|
value: [0.98873874 0.98423423 0.98423423 0.98648649 0.98876404 0.98426966
|
|
0.98202247 0.98651685 0.98876404 0.98876404]
|
|
|
|
mean value: 0.98627948172892
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 0.95833333 0.97959184 0.95833333 0.93617021 0.95833333
|
|
0.91666667 0.93617021 0.91666667 0.88461538]
|
|
|
|
mean value: 0.940321431354866
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 0.98426966 0.98426966 0.98654709 0.98881432 0.9844098
|
|
0.98214286 0.98648649 0.98881432 0.98866213]
|
|
|
|
mean value: 0.986318036603825
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.95652174 0.95833333
|
|
0.91666667 1. 0.95652174 0.85185185]
|
|
|
|
mean value: 0.9639895330112721
|
|
|
|
key: train_precision
|
|
value: [0.98654709 0.98206278 0.98206278 0.98214286 0.98660714 0.97787611
|
|
0.97777778 0.98648649 0.98222222 0.99543379]
|
|
|
|
mean value: 0.9839219028375424
|
|
|
|
key: test_recall
|
|
value: [0.92 0.92 0.96 0.92 0.91666667 0.95833333
|
|
0.91666667 0.88 0.88 0.92 ]
|
|
|
|
mean value: 0.9191666666666667
|
|
|
|
key: train_recall
|
|
value: [0.99099099 0.98648649 0.98648649 0.99099099 0.99103139 0.99103139
|
|
0.98654709 0.98648649 0.9954955 0.98198198]
|
|
|
|
mean value: 0.9887528784389771
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.96 0.98 0.96 0.93833333 0.95916667
|
|
0.91833333 0.94 0.91916667 0.87666667]
|
|
|
|
mean value: 0.9411666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.98873874 0.98423423 0.98423423 0.98648649 0.98875894 0.98425443
|
|
0.98201228 0.98651679 0.98877914 0.98874884]
|
|
|
|
mean value: 0.9862764109400881
|
|
|
|
key: test_jcc
|
|
value: [0.92 0.92 0.96 0.92 0.88 0.92
|
|
0.84615385 0.88 0.84615385 0.79310345]
|
|
|
|
mean value: 0.8885411140583555
|
|
|
|
key: train_jcc
|
|
value: [0.97777778 0.96902655 0.96902655 0.97345133 0.97787611 0.96929825
|
|
0.96491228 0.97333333 0.97787611 0.97757848]
|
|
|
|
mean value: 0.9730156749931365
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02351522 0.01143217 0.01135421 0.00991988 0.01008391 0.00996041
|
|
0.01298809 0.01356626 0.01061201 0.01017332]
|
|
|
|
mean value: 0.012360548973083496
|
|
|
|
key: score_time
|
|
value: [0.01136422 0.01014304 0.00985026 0.00899959 0.00900364 0.0087328
|
|
0.01209545 0.00974679 0.00935864 0.00874829]
|
|
|
|
mean value: 0.009804272651672363
|
|
|
|
key: test_mcc
|
|
value: [0.88070485 0.68887476 0.68 0.54886043 0.42107141 0.63819901
|
|
0.30550961 0.715 0.6363961 0.715 ]
|
|
|
|
mean value: 0.6229616178094004
|
|
|
|
key: train_mcc
|
|
value: [0.63271384 0.69507721 0.66367887 0.67666495 0.65968449 0.63233469
|
|
0.63872848 0.67310111 0.71811613 0.7041106 ]
|
|
|
|
mean value: 0.6694210366789762
|
|
|
|
key: test_accuracy
|
|
value: [0.94 0.84 0.84 0.76 0.69387755 0.81632653
|
|
0.65306122 0.85714286 0.81632653 0.85714286]
|
|
|
|
mean value: 0.8073877551020407
|
|
|
|
key: train_accuracy
|
|
value: [0.81531532 0.84684685 0.83108108 0.83783784 0.82921348 0.81573034
|
|
0.81797753 0.83595506 0.85842697 0.85168539]
|
|
|
|
mean value: 0.8340069845126025
|
|
|
|
key: test_fscore
|
|
value: [0.93877551 0.85185185 0.84 0.79310345 0.73684211 0.82352941
|
|
0.63829787 0.85714286 0.83018868 0.85714286]
|
|
|
|
mean value: 0.8166874593231083
|
|
|
|
key: train_fscore
|
|
value: [0.82251082 0.85152838 0.83660131 0.84210526 0.83478261 0.8209607
|
|
0.82655246 0.84026258 0.86214442 0.85462555]
|
|
|
|
mean value: 0.8392074099899087
|
|
|
|
key: test_precision
|
|
value: [0.95833333 0.79310345 0.84 0.6969697 0.63636364 0.77777778
|
|
0.65217391 0.875 0.78571429 0.875 ]
|
|
|
|
mean value: 0.789043609147807
|
|
|
|
key: train_precision
|
|
value: [0.79166667 0.82627119 0.81012658 0.82051282 0.81012658 0.8
|
|
0.79098361 0.81702128 0.83829787 0.8362069 ]
|
|
|
|
mean value: 0.8141213490222399
|
|
|
|
key: test_recall
|
|
value: [0.92 0.92 0.84 0.92 0.875 0.875 0.625 0.84 0.88 0.84 ]
|
|
|
|
mean value: 0.8535
|
|
|
|
key: train_recall
|
|
value: [0.85585586 0.87837838 0.86486486 0.86486486 0.86098655 0.84304933
|
|
0.86547085 0.86486486 0.88738739 0.87387387]
|
|
|
|
mean value: 0.8659596816547489
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.84 0.84 0.76 0.6975 0.8175 0.6525 0.8575 0.815 0.8575]
|
|
|
|
mean value: 0.80775
|
|
|
|
key: train_roc_auc
|
|
value: [0.81531532 0.84684685 0.83108108 0.83783784 0.82914192 0.81566881
|
|
0.81787056 0.83601988 0.8584919 0.85173514]
|
|
|
|
mean value: 0.8340009291803013
|
|
|
|
key: test_jcc
|
|
value: [0.88461538 0.74193548 0.72413793 0.65714286 0.58333333 0.7
|
|
0.46875 0.75 0.70967742 0.75 ]
|
|
|
|
mean value: 0.6969592409351865
|
|
|
|
key: train_jcc
|
|
value: [0.69852941 0.74144487 0.71910112 0.72727273 0.71641791 0.6962963
|
|
0.70437956 0.7245283 0.75769231 0.74615385]
|
|
|
|
mean value: 0.723181635407389
|
|
|
|
MCC on Blind test: 0.7
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01996326 0.02930212 0.02290964 0.02392697 0.02542186 0.02594995
|
|
0.02480555 0.02159262 0.0215888 0.02682495]
|
|
|
|
mean value: 0.024228572845458984
|
|
|
|
key: score_time
|
|
value: [0.0103476 0.01182961 0.01219368 0.02413225 0.01205063 0.01199412
|
|
0.01199555 0.01192403 0.01194859 0.01230097]
|
|
|
|
mean value: 0.013071703910827636
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.88640526 0.88640526 0.81649658 0.83666667 0.96
|
|
0.91833333 0.80235519 0.75793094 0.88388348]
|
|
|
|
mean value: 0.87092456296181
|
|
|
|
key: train_mcc
|
|
value: [0.96847829 0.96895027 0.95946919 0.89306822 0.97307234 0.96408444
|
|
0.96862627 0.96854863 0.97307343 0.99101119]
|
|
|
|
mean value: 0.9628382277807876
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.94 0.94 0.9 0.91836735 0.97959184
|
|
0.95918367 0.89795918 0.87755102 0.93877551]
|
|
|
|
mean value: 0.9331428571428572
|
|
|
|
key: train_accuracy
|
|
value: [0.98423423 0.98423423 0.97972973 0.94369369 0.98651685 0.98202247
|
|
0.98426966 0.98426966 0.98651685 0.99550562]
|
|
|
|
mean value: 0.9810993015487398
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.94339623 0.93617021 0.88888889 0.91666667 0.97959184
|
|
0.95833333 0.89361702 0.875 0.94339623]
|
|
|
|
mean value: 0.9314652249231019
|
|
|
|
key: train_fscore
|
|
value: [0.98419865 0.98447894 0.97968397 0.94033413 0.98660714 0.98198198
|
|
0.9844098 0.98419865 0.98654709 0.9954955 ]
|
|
|
|
mean value: 0.9807935833776061
|
|
|
|
key: test_precision
|
|
value: [1. 0.89285714 1. 1. 0.91666667 0.96
|
|
0.95833333 0.95454545 0.91304348 0.89285714]
|
|
|
|
mean value: 0.948830321852061
|
|
|
|
key: train_precision
|
|
value: [0.98642534 0.96943231 0.98190045 1. 0.98222222 0.98642534
|
|
0.97787611 0.98642534 0.98214286 0.9954955 ]
|
|
|
|
mean value: 0.984834546605398
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.88 0.8 0.91666667 1.
|
|
0.95833333 0.84 0.84 1. ]
|
|
|
|
mean value: 0.9195
|
|
|
|
key: train_recall
|
|
value: [0.98198198 1. 0.97747748 0.88738739 0.99103139 0.97757848
|
|
0.99103139 0.98198198 0.99099099 0.9954955 ]
|
|
|
|
mean value: 0.9774956570920696
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.94 0.94 0.9 0.91833333 0.98
|
|
0.95916667 0.89916667 0.87833333 0.9375 ]
|
|
|
|
mean value: 0.9332499999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.98423423 0.98423423 0.97972973 0.94369369 0.98650669 0.98203248
|
|
0.98425443 0.98426453 0.98652689 0.9955056 ]
|
|
|
|
mean value: 0.9810982507170848
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.89285714 0.88 0.8 0.84615385 0.96
|
|
0.92 0.80769231 0.77777778 0.89285714]
|
|
|
|
mean value: 0.8737338217338217
|
|
|
|
key: train_jcc
|
|
value: [0.96888889 0.96943231 0.96017699 0.88738739 0.97356828 0.96460177
|
|
0.96929825 0.96888889 0.97345133 0.99103139]
|
|
|
|
mean value: 0.9626725485758111
|
|
|
|
MCC on Blind test: 0.81
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01990843 0.02046919 0.02080297 0.01966143 0.02220058 0.02803659
|
|
0.02483511 0.02362347 0.02931428 0.02548862]
|
|
|
|
mean value: 0.02343406677246094
|
|
|
|
key: score_time
|
|
value: [0.01227689 0.01180506 0.01192141 0.01200867 0.01209712 0.01524425
|
|
0.01525784 0.01590824 0.01786828 0.01726747]
|
|
|
|
mean value: 0.014165520668029785
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 0.92 0.85096294 0.88640526 0.71889189 0.91833333
|
|
0.87813144 0.77771377 0.80235519 0.60104076]
|
|
|
|
mean value: 0.8314603519165027
|
|
|
|
key: train_mcc
|
|
value: [0.97297297 0.92553856 0.94305976 0.89065536 0.9513445 0.95963881
|
|
0.94622223 0.70789906 0.97303357 0.82718662]
|
|
|
|
mean value: 0.9097551433791422
|
|
|
|
key: test_accuracy
|
|
value: [0.98 0.96 0.92 0.94 0.85714286 0.95918367
|
|
0.93877551 0.87755102 0.89795918 0.79591837]
|
|
|
|
mean value: 0.9126530612244897
|
|
|
|
key: train_accuracy
|
|
value: [0.98648649 0.96171171 0.97072072 0.94369369 0.9752809 0.97977528
|
|
0.97303371 0.83370787 0.98651685 0.90786517]
|
|
|
|
mean value: 0.9518792387893511
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 0.96 0.91304348 0.93617021 0.84444444 0.95833333
|
|
0.93617021 0.89285714 0.89361702 0.7826087 ]
|
|
|
|
mean value: 0.9096836378091169
|
|
|
|
key: train_fscore
|
|
value: [0.98648649 0.96037296 0.96983759 0.94117647 0.97482838 0.97968397
|
|
0.97285068 0.85714286 0.98648649 0.8992629 ]
|
|
|
|
mean value: 0.9528128774277923
|
|
|
|
key: test_precision
|
|
value: [1. 0.96 1. 1. 0.9047619 0.95833333
|
|
0.95652174 0.80645161 0.95454545 0.85714286]
|
|
|
|
mean value: 0.939775690181721
|
|
|
|
key: train_precision
|
|
value: [0.98648649 0.99516908 1. 0.98522167 0.9953271 0.98636364
|
|
0.98173516 0.75 0.98648649 0.98918919]
|
|
|
|
mean value: 0.965597881814934
|
|
|
|
key: test_recall
|
|
value: [0.96 0.96 0.84 0.88 0.79166667 0.95833333
|
|
0.91666667 1. 0.84 0.72 ]
|
|
|
|
mean value: 0.8866666666666666
|
|
|
|
key: train_recall
|
|
value: [0.98648649 0.92792793 0.94144144 0.9009009 0.95515695 0.97309417
|
|
0.96412556 1. 0.98648649 0.82432432]
|
|
|
|
mean value: 0.9459944249181917
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96 0.92 0.94 0.85583333 0.95916667
|
|
0.93833333 0.875 0.89916667 0.7975 ]
|
|
|
|
mean value: 0.9125
|
|
|
|
key: train_roc_auc
|
|
value: [0.98648649 0.96171171 0.97072072 0.94369369 0.97532622 0.97979033
|
|
0.97305377 0.83408072 0.98651679 0.90767786]
|
|
|
|
mean value: 0.9519058295964126
|
|
|
|
key: test_jcc
|
|
value: [0.96 0.92307692 0.84 0.88 0.73076923 0.92
|
|
0.88 0.80645161 0.80769231 0.64285714]
|
|
|
|
mean value: 0.839084721729883
|
|
|
|
key: train_jcc
|
|
value: [0.97333333 0.92376682 0.94144144 0.88888889 0.95089286 0.96017699
|
|
0.94713656 0.75 0.97333333 0.81696429]
|
|
|
|
mean value: 0.9125934511024733
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.2129302 0.19134593 0.18476772 0.19252348 0.17929888 0.17443395
|
|
0.16943169 0.17664385 0.19328618 0.19234943]
|
|
|
|
mean value: 0.1867011308670044
|
|
|
|
key: score_time
|
|
value: [0.01836157 0.01695943 0.01809049 0.01672387 0.0186944 0.01590395
|
|
0.01539612 0.01729679 0.01770878 0.01800537]
|
|
|
|
mean value: 0.01731407642364502
|
|
|
|
key: test_mcc
|
|
value: [0.92295821 1. 1. 0.96076892 0.91833333 0.96
|
|
0.95993456 0.96 0.96 0.88388348]
|
|
|
|
mean value: 0.9525878499373318
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96 1. 1. 0.98 0.95918367 0.97959184
|
|
0.97959184 0.97959184 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9756326530612245
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95833333 1. 1. 0.98039216 0.95833333 0.97959184
|
|
0.9787234 0.97959184 0.97959184 0.94339623]
|
|
|
|
mean value: 0.9757953964403907
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.96153846 0.95833333 0.96
|
|
1. 1. 1. 0.89285714]
|
|
|
|
mean value: 0.9772728937728938
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 1. 1. 0.95833333 1.
|
|
0.95833333 0.96 0.96 1. ]
|
|
|
|
mean value: 0.9756666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 1. 1. 0.98 0.95916667 0.98
|
|
0.97916667 0.98 0.98 0.9375 ]
|
|
|
|
mean value: 0.9755833333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.92 1. 1. 0.96153846 0.92 0.96
|
|
0.95833333 0.96 0.96 0.89285714]
|
|
|
|
mean value: 0.9532728937728938
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05467558 0.05334568 0.04526019 0.05006552 0.05799127 0.05810881
|
|
0.04847312 0.05014277 0.04765916 0.04914188]
|
|
|
|
mean value: 0.05148639678955078
|
|
|
|
key: score_time
|
|
value: [0.03140426 0.02127337 0.02089715 0.02047062 0.02144003 0.02220535
|
|
0.02192521 0.02027392 0.02155662 0.02414703]
|
|
|
|
mean value: 0.022559356689453126
|
|
|
|
key: test_mcc
|
|
value: [0.84 1. 0.96076892 0.96076892 0.96 1.
|
|
0.95993456 0.92153718 0.92153718 0.88388348]
|
|
|
|
mean value: 0.9408430236571541
|
|
|
|
key: train_mcc
|
|
value: [0.99099099 0.98657657 1. 0.99099099 0.9955157 0.99101119
|
|
0.9955157 0.99105105 0.99105105 0.98652661]
|
|
|
|
mean value: 0.9919229847279497
|
|
|
|
key: test_accuracy
|
|
value: [0.92 1. 0.98 0.98 0.97959184 1.
|
|
0.97959184 0.95918367 0.95918367 0.93877551]
|
|
|
|
mean value: 0.9696326530612245
|
|
|
|
key: train_accuracy
|
|
value: [0.9954955 0.99324324 1. 0.9954955 0.99775281 0.99550562
|
|
0.99775281 0.99550562 0.99550562 0.99325843]
|
|
|
|
mean value: 0.9959515133110639
|
|
|
|
key: test_fscore
|
|
value: [0.92 1. 0.97959184 0.97959184 0.97959184 1.
|
|
0.9787234 0.95833333 0.95833333 0.94339623]
|
|
|
|
mean value: 0.9697561807541162
|
|
|
|
key: train_fscore
|
|
value: [0.9954955 0.99319728 1. 0.9954955 0.99775281 0.9955157
|
|
0.99775281 0.99547511 0.99547511 0.99322799]
|
|
|
|
mean value: 0.9959387800162347
|
|
|
|
key: test_precision
|
|
value: [0.92 1. 1. 1. 0.96 1.
|
|
1. 1. 1. 0.89285714]
|
|
|
|
mean value: 0.9772857142857143
|
|
|
|
key: train_precision
|
|
value: [0.9954955 1. 1. 0.9954955 1. 0.9955157
|
|
1. 1. 1. 0.99547511]
|
|
|
|
mean value: 0.9981981799180427
|
|
|
|
key: test_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.92 1. 0.96 0.96 1. 1.
|
|
0.95833333 0.92 0.92 1. ]
|
|
|
|
mean value: 0.9638333333333333
|
|
|
|
key: train_recall
|
|
value: [0.9954955 0.98648649 1. 0.9954955 0.9955157 0.9955157
|
|
0.9955157 0.99099099 0.99099099 0.99099099]
|
|
|
|
mean value: 0.9936997535652244
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 1. 0.98 0.98 0.98 1.
|
|
0.97916667 0.96 0.96 0.9375 ]
|
|
|
|
mean value: 0.9696666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.9954955 0.99324324 1. 0.9954955 0.99775785 0.9955056
|
|
0.99775785 0.9954955 0.9954955 0.99325334]
|
|
|
|
mean value: 0.9959499858602998
|
|
|
|
key: test_jcc
|
|
value: [0.85185185 1. 0.96 0.96 0.96 1.
|
|
0.95833333 0.92 0.92 0.89285714]
|
|
|
|
mean value: 0.9423042328042328
|
|
|
|
key: train_jcc
|
|
value: [0.99103139 0.98648649 1. 0.99103139 0.9955157 0.99107143
|
|
0.9955157 0.99099099 0.99099099 0.98654709]
|
|
|
|
mean value: 0.9919181152645278
|
|
|
|
MCC on Blind test: 0.91
|
|
|
|
Accuracy on Blind test: 0.96
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.24292994 0.30352616 0.18884683 0.20338106 0.1789639 0.1880517
|
|
0.19062209 0.25489998 0.31271124 0.29555941]
|
|
|
|
mean value: 0.2359492301940918
|
|
|
|
key: score_time
|
|
value: [0.02458429 0.04391623 0.0244441 0.03348184 0.04872298 0.03391719
|
|
0.02556372 0.03823185 0.06575251 0.02959728]
|
|
|
|
mean value: 0.03682119846343994
|
|
|
|
key: test_mcc
|
|
value: [0.76244374 0.84270097 0.64051262 0.84270097 0.75712849 0.68145382
|
|
0.57236448 0.88443328 0.68145382 0.63333333]
|
|
|
|
mean value: 0.7298525519511979
|
|
|
|
key: train_mcc
|
|
value: [0.98214142 0.98198198 0.99103121 0.98202183 0.98652661 0.98652661
|
|
0.97761752 0.99105105 0.98206242 1. ]
|
|
|
|
mean value: 0.9860960660454435
|
|
|
|
key: test_accuracy
|
|
value: [0.88 0.92 0.82 0.92 0.87755102 0.83673469
|
|
0.7755102 0.93877551 0.83673469 0.81632653]
|
|
|
|
mean value: 0.8621632653061224
|
|
|
|
key: train_accuracy
|
|
value: [0.99099099 0.99099099 0.9954955 0.99099099 0.99325843 0.99325843
|
|
0.98876404 0.99550562 0.99101124 1. ]
|
|
|
|
mean value: 0.9930266221277457
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.92307692 0.82352941 0.92307692 0.86956522 0.81818182
|
|
0.73170732 0.93617021 0.85185185 0.81632653]
|
|
|
|
mean value: 0.85684862057949
|
|
|
|
key: train_fscore
|
|
value: [0.99090909 0.99099099 0.99547511 0.99103139 0.99328859 0.99328859
|
|
0.98871332 0.99547511 0.99103139 1. ]
|
|
|
|
mean value: 0.9930203587905961
|
|
|
|
key: test_precision
|
|
value: [0.91304348 0.88888889 0.80769231 0.88888889 0.90909091 0.9
|
|
0.88235294 1. 0.79310345 0.83333333]
|
|
|
|
mean value: 0.881639419560753
|
|
|
|
key: train_precision
|
|
value: [1. 0.99099099 1. 0.98660714 0.99107143 0.99107143
|
|
0.99545455 1. 0.98660714 1. ]
|
|
|
|
mean value: 0.9941802679302679
|
|
|
|
key: test_recall
|
|
value: [0.84 0.96 0.84 0.96 0.83333333 0.75
|
|
0.625 0.88 0.92 0.8 ]
|
|
|
|
mean value: 0.8408333333333333
|
|
|
|
key: train_recall
|
|
value: [0.98198198 0.99099099 0.99099099 0.9954955 0.9955157 0.9955157
|
|
0.98206278 0.99099099 0.9954955 1. ]
|
|
|
|
mean value: 0.9919040116349533
|
|
|
|
key: test_roc_auc
|
|
value: [0.88 0.92 0.82 0.92 0.87666667 0.835
|
|
0.7725 0.94 0.835 0.81666667]
|
|
|
|
mean value: 0.8615833333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.99099099 0.99099099 0.9954955 0.99099099 0.99325334 0.99325334
|
|
0.98877914 0.9954955 0.99102129 1. ]
|
|
|
|
mean value: 0.9930271078253141
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.85714286 0.7 0.85714286 0.76923077 0.69230769
|
|
0.57692308 0.88 0.74193548 0.68965517]
|
|
|
|
mean value: 0.7542115686809792
|
|
|
|
key: train_jcc
|
|
value: [0.98198198 0.98214286 0.99099099 0.98222222 0.98666667 0.98666667
|
|
0.97767857 0.99099099 0.98222222 1. ]
|
|
|
|
mean value: 0.986156317031317
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.72777963 0.73440146 0.72864008 0.73297572 0.72255707 0.73133516
|
|
0.71890521 0.73295641 0.69569707 0.72709918]
|
|
|
|
mean value: 0.7252346992492675
|
|
|
|
key: score_time
|
|
value: [0.01107264 0.01113224 0.0108335 0.01081014 0.0110662 0.01080418
|
|
0.01082015 0.01080537 0.01141047 0.01085973]
|
|
|
|
mean value: 0.010961461067199706
|
|
|
|
key: test_mcc
|
|
value: [0.84 1. 1. 1. 0.91833333 1.
|
|
0.95993456 0.96 0.96 0.88388348]
|
|
|
|
mean value: 0.9522151369551898
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92 1. 1. 1. 0.95918367 1.
|
|
0.97959184 0.97959184 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9756734693877551
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92 1. 1. 1. 0.95833333 1.
|
|
0.9787234 0.97959184 0.97959184 0.94339623]
|
|
|
|
mean value: 0.9759636637473135
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.92 1. 1. 1. 0.95833333 1.
|
|
1. 1. 1. 0.89285714]
|
|
|
|
mean value: 0.9771190476190477
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.92 1. 1. 1. 0.95833333 1.
|
|
0.95833333 0.96 0.96 1. ]
|
|
|
|
mean value: 0.9756666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 1. 1. 1. 0.95916667 1.
|
|
0.97916667 0.98 0.98 0.9375 ]
|
|
|
|
mean value: 0.9755833333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85185185 1. 1. 1. 0.92 1.
|
|
0.95833333 0.96 0.96 0.89285714]
|
|
|
|
mean value: 0.9543042328042328
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.88
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.06196046 0.10875964 0.1522305 0.09208226 0.10462809 0.07834649
|
|
0.10365725 0.13421226 0.08688784 0.09222364]
|
|
|
|
mean value: 0.10149884223937988
|
|
|
|
key: score_time
|
|
value: [0.0132103 0.01405787 0.01433253 0.01217747 0.01229 0.01412511
|
|
0.01401472 0.01908922 0.01991558 0.02431083]
|
|
|
|
mean value: 0.015752363204956054
|
|
|
|
key: test_mcc
|
|
value: [0.84 0.88070485 0.57154761 0.88070485 0.7202771 0.79632832
|
|
0.7202771 0.7145252 0.59297231 0.43071846]
|
|
|
|
mean value: 0.7148055799157661
|
|
|
|
key: train_mcc
|
|
value: [0.94305976 0.90043405 0.99103121 0.88900089 0.93334181 0.91144903
|
|
0.977776 0.97318977 0.96439334 0.94317675]
|
|
|
|
mean value: 0.9426852605440293
|
|
|
|
key: test_accuracy
|
|
value: [0.92 0.94 0.78 0.94 0.85714286 0.89795918
|
|
0.85714286 0.85714286 0.79591837 0.71428571]
|
|
|
|
mean value: 0.8559591836734693
|
|
|
|
key: train_accuracy
|
|
value: [0.97072072 0.9481982 0.9954955 0.94144144 0.96629213 0.95505618
|
|
0.98876404 0.98651685 0.98202247 0.97078652]
|
|
|
|
mean value: 0.9705294058103047
|
|
|
|
key: test_fscore
|
|
value: [0.92 0.93877551 0.8 0.94117647 0.8627451 0.89361702
|
|
0.8627451 0.8627451 0.80769231 0.70833333]
|
|
|
|
mean value: 0.85978299372122
|
|
|
|
key: train_fscore
|
|
value: [0.96983759 0.94562648 0.99547511 0.93779904 0.96703297 0.95391705
|
|
0.98866213 0.98636364 0.98173516 0.96983759]
|
|
|
|
mean value: 0.9696286753164138
|
|
|
|
key: test_precision
|
|
value: [0.92 0.95833333 0.73333333 0.92307692 0.81481481 0.91304348
|
|
0.81481481 0.84615385 0.77777778 0.73913043]
|
|
|
|
mean value: 0.8440478756348322
|
|
|
|
key: train_precision
|
|
value: [1. 0.99502488 1. 1. 0.94827586 0.98104265
|
|
1. 0.99541284 0.99537037 1. ]
|
|
|
|
mean value: 0.991512660612636
|
|
|
|
key: test_recall
|
|
value: [0.92 0.92 0.88 0.96 0.91666667 0.875
|
|
0.91666667 0.88 0.84 0.68 ]
|
|
|
|
mean value: 0.8788333333333334
|
|
|
|
key: train_recall
|
|
value: [0.94144144 0.9009009 0.99099099 0.88288288 0.98654709 0.92825112
|
|
0.97757848 0.97747748 0.96846847 0.94144144]
|
|
|
|
mean value: 0.9495980285217953
|
|
|
|
key: test_roc_auc
|
|
value: [0.92 0.94 0.78 0.94 0.85833333 0.8975
|
|
0.85833333 0.85666667 0.795 0.715 ]
|
|
|
|
mean value: 0.8560833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.97072072 0.9481982 0.9954955 0.94144144 0.96624652 0.95511655
|
|
0.98878924 0.98649659 0.98199208 0.97072072]
|
|
|
|
mean value: 0.9705217549387953
|
|
|
|
key: test_jcc
|
|
value: [0.85185185 0.88461538 0.66666667 0.88888889 0.75862069 0.80769231
|
|
0.75862069 0.75862069 0.67741935 0.5483871 ]
|
|
|
|
mean value: 0.760138362029352
|
|
|
|
key: train_jcc
|
|
value: [0.94144144 0.89686099 0.99099099 0.88288288 0.93617021 0.91189427
|
|
0.97757848 0.97309417 0.96412556 0.94144144]
|
|
|
|
mean value: 0.9416480435475579
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0152452 0.02876616 0.01938677 0.02208495 0.0235014 0.0355444
|
|
0.05327106 0.05544758 0.03134871 0.05830264]
|
|
|
|
mean value: 0.034289884567260745
|
|
|
|
key: score_time
|
|
value: [0.04853225 0.03252292 0.01783013 0.01610827 0.05006671 0.01594114
|
|
0.01828265 0.01959538 0.02210021 0.020504 ]
|
|
|
|
mean value: 0.026148366928100585
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 1. 0.96076892 1. 0.79666667 0.92153718
|
|
0.83973406 0.87833333 0.79632832 0.84757938]
|
|
|
|
mean value: 0.9001716782603759
|
|
|
|
key: train_mcc
|
|
value: [0.95954708 0.96412048 0.95954708 0.9597029 0.96419915 0.95079918
|
|
0.95963637 0.95979475 0.95103575 0.95963881]
|
|
|
|
mean value: 0.9588021544847689
|
|
|
|
key: test_accuracy
|
|
value: [0.98 1. 0.98 1. 0.89795918 0.95918367
|
|
0.91836735 0.93877551 0.89795918 0.91836735]
|
|
|
|
mean value: 0.9490612244897959
|
|
|
|
key: train_accuracy
|
|
value: [0.97972973 0.98198198 0.97972973 0.97972973 0.98202247 0.9752809
|
|
0.97977528 0.97977528 0.9752809 0.97977528]
|
|
|
|
mean value: 0.9793081283530721
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 1. 0.97959184 1. 0.89795918 0.96
|
|
0.92 0.93877551 0.90196078 0.92592593]
|
|
|
|
mean value: 0.950380507758659
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:188: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_8020.py:191: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.97986577 0.98214286 0.97986577 0.97995546 0.98222222 0.97560976
|
|
0.97995546 0.97995546 0.97560976 0.97986577]
|
|
|
|
mean value: 0.979504827670691
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.88 0.92307692
|
|
0.88461538 0.95833333 0.88461538 0.86206897]
|
|
|
|
mean value: 0.9392709991158267
|
|
|
|
key: train_precision
|
|
value: [0.97333333 0.97345133 0.97333333 0.969163 0.97356828 0.96491228
|
|
0.97345133 0.969163 0.96069869 0.97333333]
|
|
|
|
mean value: 0.9704407898653096
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 0.96 1. 0.91666667 1.
|
|
0.95833333 0.92 0.92 1. ]
|
|
|
|
mean value: 0.9635
|
|
|
|
key: train_recall
|
|
value: [0.98648649 0.99099099 0.98648649 0.99099099 0.99103139 0.98654709
|
|
0.98654709 0.99099099 0.99099099 0.98648649]
|
|
|
|
mean value: 0.988754898396154
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 1. 0.98 1. 0.89833333 0.96
|
|
0.91916667 0.93916667 0.8975 0.91666667]
|
|
|
|
mean value: 0.9490833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.97972973 0.98198198 0.97972973 0.97972973 0.98200218 0.97525552
|
|
0.97976003 0.97980043 0.97531612 0.97979033]
|
|
|
|
mean value: 0.9793095786369329
|
|
|
|
key: test_jcc
|
|
value: [0.96 1. 0.96 1. 0.81481481 0.92307692
|
|
0.85185185 0.88461538 0.82142857 0.86206897]
|
|
|
|
mean value: 0.9077856511304787
|
|
|
|
key: train_jcc
|
|
value: [0.96052632 0.96491228 0.96052632 0.96069869 0.9650655 0.95238095
|
|
0.96069869 0.96069869 0.95238095 0.96052632]
|
|
|
|
mean value: 0.9598414704884481
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.45373392 0.50935388 0.52400017 0.63479996 0.42246246 0.4566958
|
|
0.50956869 0.57510614 0.48433805 0.48746967]
|
|
|
|
mean value: 0.5057528734207153
|
|
|
|
key: score_time
|
|
value: [0.01919889 0.02593637 0.01549244 0.02574992 0.03332043 0.03430772
|
|
0.03150678 0.0288868 0.03122258 0.02288985]
|
|
|
|
mean value: 0.026851177215576172
|
|
|
|
key: test_mcc
|
|
value: [0.96076892 1. 1. 1. 0.79666667 0.92153718
|
|
0.83973406 0.87833333 0.87833333 0.75712849]
|
|
|
|
mean value: 0.9032501979347523
|
|
|
|
key: train_mcc
|
|
value: [0.95954708 0.96412048 0.96847829 0.9597029 0.96419915 0.95079918
|
|
0.95963637 0.95979475 0.97307343 0.9820617 ]
|
|
|
|
mean value: 0.9641413319140149
|
|
|
|
key: test_accuracy
|
|
value: [0.98 1. 1. 1. 0.89795918 0.95918367
|
|
0.91836735 0.93877551 0.93877551 0.87755102]
|
|
|
|
mean value: 0.9510612244897959
|
|
|
|
key: train_accuracy
|
|
value: [0.97972973 0.98198198 0.98423423 0.97972973 0.98202247 0.9752809
|
|
0.97977528 0.97977528 0.98651685 0.99101124]
|
|
|
|
mean value: 0.9820057698147586
|
|
|
|
key: test_fscore
|
|
value: [0.97959184 1. 1. 1. 0.89795918 0.96
|
|
0.92 0.93877551 0.93877551 0.88461538]
|
|
|
|
mean value: 0.9519717425431711
|
|
|
|
key: train_fscore
|
|
value: [0.97986577 0.98214286 0.98426966 0.97995546 0.98222222 0.97560976
|
|
0.97995546 0.97995546 0.98654709 0.99095023]
|
|
|
|
mean value: 0.9821473951352674
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 0.88 0.92307692
|
|
0.88461538 0.95833333 0.95833333 0.85185185]
|
|
|
|
mean value: 0.9456210826210827
|
|
|
|
key: train_precision
|
|
value: [0.97333333 0.97345133 0.98206278 0.969163 0.97356828 0.96491228
|
|
0.97345133 0.969163 0.98214286 0.99545455]
|
|
|
|
mean value: 0.9756702724896559
|
|
|
|
key: test_recall
|
|
value: [0.96 1. 1. 1. 0.91666667 1.
|
|
0.95833333 0.92 0.92 0.92 ]
|
|
|
|
mean value: 0.9595
|
|
|
|
key: train_recall
|
|
value: [0.98648649 0.99099099 0.98648649 0.99099099 0.99103139 0.98654709
|
|
0.98654709 0.99099099 0.99099099 0.98648649]
|
|
|
|
mean value: 0.988754898396154
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 1. 1. 1. 0.89833333 0.96
|
|
0.91916667 0.93916667 0.93916667 0.87666667]
|
|
|
|
mean value: 0.95125
|
|
|
|
key: train_roc_auc
|
|
value: [0.97972973 0.98198198 0.98423423 0.97972973 0.98200218 0.97525552
|
|
0.97976003 0.97980043 0.98652689 0.99100109]
|
|
|
|
mean value: 0.9820021815537511
|
|
|
|
key: test_jcc
|
|
value: [0.96 1. 1. 1. 0.81481481 0.92307692
|
|
0.85185185 0.88461538 0.88461538 0.79310345]
|
|
|
|
mean value: 0.9112077807250221
|
|
|
|
key: train_jcc
|
|
value: [0.96052632 0.96491228 0.96902655 0.96069869 0.9650655 0.95238095
|
|
0.96069869 0.96069869 0.97345133 0.98206278]
|
|
|
|
mean value: 0.9649521777299835
|
|
|
|
MCC on Blind test: 0.84
|
|
|
|
Accuracy on Blind test: 0.93
|