19624 lines
985 KiB
Text
19624 lines
985 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_sl.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 531
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 531
|
|
ncols: 286
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 263
|
|
log10_or_mychisq 263
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 167
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 174
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification according to scaling law [COMPLETE data]: 1/sqrt(x_ncols)
|
|
Input features data size: (531, 174)
|
|
Train data size: (490, 174)
|
|
Test data size: (41, 174)
|
|
y_train numbers: Counter({0: 448, 1: 42})
|
|
y_train ratio: 10.666666666666666
|
|
|
|
y_test_numbers: Counter({0: 37, 1: 4})
|
|
y_test ratio: 9.25
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({0: 448, 1: 42}) Data dim: (490, 174)
|
|
|
|
Simple Random OverSampling
|
|
Counter({0: 448, 1: 448})
|
|
(896, 174)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 42, 1: 42})
|
|
(84, 174)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 448, 1: 448})
|
|
(896, 174)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 448, 1: 448})
|
|
(896, 174)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis [COMPLETE DATA]: 70/30 split
|
|
Gene name: gid
|
|
Drug name: streptomycin
|
|
|
|
Output directory: /home/tanu/git/Data/streptomycin/output/ml/tts_cd_sl/
|
|
|
|
Sanity checks:
|
|
Total input features: 174
|
|
|
|
Training data size: (490, 174)
|
|
Test data size: (41, 174)
|
|
|
|
Target feature numbers (training data): Counter({0: 448, 1: 42})
|
|
Target features ratio (training data: 10.666666666666666
|
|
|
|
Target feature numbers (test data): Counter({0: 37, 1: 4})
|
|
Target features ratio (test data): 9.25
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 35
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_na_affinity']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07848811 0.09707093 0.08534694 0.10038543 0.0732553 0.13863468
|
|
0.12368608 0.10416126 0.12571001 0.13071203]
|
|
|
|
mean value: 0.10574507713317871
|
|
|
|
key: score_time
|
|
value: [0.02824497 0.02965403 0.02464962 0.02455568 0.02345347 0.02584147
|
|
0.03807449 0.01823997 0.02558208 0.02534676]
|
|
|
|
mean value: 0.02636425495147705
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0. 0. 0. -0.04303315
|
|
0. 0.48412292 0.61193797 0. ]
|
|
|
|
mean value: 0.10530277448697566
|
|
|
|
key: train_mcc
|
|
value: [0.4135851 0.44265133 0.34893035 0.34874058 0.38246502 0.38246502
|
|
0.38246502 0.34874058 0.22303476 0.3880796 ]
|
|
|
|
mean value: 0.36611573579567036
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.91836735 0.91836735 0.91836735 0.89795918
|
|
0.91836735 0.93877551 0.93877551 0.89795918]
|
|
|
|
mean value: 0.9183673469387755
|
|
|
|
key: train_accuracy
|
|
value: [0.92970522 0.93197279 0.92517007 0.92517007 0.92743764 0.92743764
|
|
0.92743764 0.92517007 0.92063492 0.92970522]
|
|
|
|
mean value: 0.926984126984127
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0.
|
|
0. 0.4 0.57142857 0. ]
|
|
|
|
mean value: 0.09714285714285716
|
|
|
|
key: train_fscore
|
|
value: [0.31111111 0.34782609 0.26666667 0.23255814 0.27272727 0.27272727
|
|
0.27272727 0.23255814 0.1025641 0.27906977]
|
|
|
|
mean value: 0.2590535831991848
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 1. 1. 0.]
|
|
|
|
mean value: 0.2
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.85714286 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9857142857142858
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0.25 0.4 0. ]
|
|
|
|
mean value: 0.065
|
|
|
|
key: train_recall
|
|
value: [0.18421053 0.21052632 0.15789474 0.13157895 0.15789474 0.15789474
|
|
0.15789474 0.13157895 0.05405405 0.16216216]
|
|
|
|
mean value: 0.15056899004267424
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.48888889
|
|
0.5 0.625 0.7 0.5 ]
|
|
|
|
mean value: 0.5313888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.59210526 0.60526316 0.57770667 0.56578947 0.57894737 0.57894737
|
|
0.57894737 0.56578947 0.52702703 0.58108108]
|
|
|
|
mean value: 0.575160425542429
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0.25 0.4 0. ]
|
|
|
|
mean value: 0.065
|
|
|
|
key: train_jcc
|
|
value: [0.18421053 0.21052632 0.15384615 0.13157895 0.15789474 0.15789474
|
|
0.15789474 0.13157895 0.05405405 0.16216216]
|
|
|
|
mean value: 0.1501641317430791
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.66909504 2.31479025 1.85108137 1.64077187 1.77799916 1.65302491
|
|
1.41625834 1.55754209 1.42636704 1.68590975]
|
|
|
|
mean value: 1.699283981323242
|
|
|
|
key: score_time
|
|
value: [0.02239442 0.02576494 0.02784419 0.02527285 0.02750134 0.02870727
|
|
0.04380941 0.02288938 0.01310778 0.01921678]
|
|
|
|
mean value: 0.025650835037231444
|
|
|
|
key: test_mcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_mcc
|
|
value: [0. 0.15525099 0. 0. 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.015525099295462535
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.91836735 0.91836735 0.91836735 0.91836735
|
|
0.91836735 0.91836735 0.89795918 0.89795918]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [0.9138322 0.91609977 0.9138322 0.9138322 0.9138322 0.9138322
|
|
0.9138322 0.9138322 0.91609977 0.91609977]
|
|
|
|
mean value: 0.9145124716553288
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0. 0.05128205 0. 0. 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.005128205128205127
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.1
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0. 0.02631579 0. 0. 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.002631578947368421
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_roc_auc
|
|
value: [0.5 0.51315789 0.5 0.5 0.5 0.5
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.5013157894736842
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0. 0.02631579 0. 0. 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.002631578947368421
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01454687 0.01091623 0.0102005 0.01039338 0.01027322 0.01078677
|
|
0.01085019 0.01015925 0.01122999 0.01087046]
|
|
|
|
mean value: 0.011022686958312988
|
|
|
|
key: score_time
|
|
value: [0.01222062 0.00927377 0.00906038 0.00921416 0.00915408 0.00969195
|
|
0.00974178 0.00889254 0.00963974 0.00964284]
|
|
|
|
mean value: 0.009653186798095703
|
|
|
|
key: test_mcc
|
|
value: [ 0.22718473 0.0725729 0.18856181 -0.06868296 0.09594259 0.02357023
|
|
-0.03058233 -0.01828792 0.23472626 -0.11677484]
|
|
|
|
mean value: 0.06082304720471863
|
|
|
|
key: train_mcc
|
|
value: [0.13215529 0.16648212 0.11794533 0.11406417 0.15142707 0.13087082
|
|
0.15518038 0.17026971 0.13383185 0.16031378]
|
|
|
|
mean value: 0.14325405064449043
|
|
|
|
key: test_accuracy
|
|
value: [0.44897959 0.40816327 0.36734694 0.3877551 0.44897959 0.32653061
|
|
0.44897959 0.46938776 0.42857143 0.40816327]
|
|
|
|
mean value: 0.41428571428571426
|
|
|
|
key: train_accuracy
|
|
value: [0.44897959 0.46031746 0.42403628 0.41723356 0.43310658 0.42176871
|
|
0.4399093 0.46712018 0.43310658 0.42857143]
|
|
|
|
mean value: 0.43741496598639457
|
|
|
|
key: test_fscore
|
|
value: [0.22857143 0.17142857 0.20512821 0.11764706 0.18181818 0.15384615
|
|
0.12903226 0.13333333 0.26315789 0.12121212]
|
|
|
|
mean value: 0.17051752069628828
|
|
|
|
key: train_fscore
|
|
value: [0.20327869 0.21710526 0.19620253 0.19435737 0.20886076 0.20062696
|
|
0.21086262 0.2192691 0.19871795 0.20754717]
|
|
|
|
mean value: 0.20568284101681436
|
|
|
|
key: test_precision
|
|
value: [0.12903226 0.09677419 0.11428571 0.06666667 0.10344828 0.08571429
|
|
0.07407407 0.07692308 0.15151515 0.07142857]
|
|
|
|
mean value: 0.09698622680825128
|
|
|
|
key: train_precision
|
|
value: [0.11610487 0.12406015 0.11151079 0.11032028 0.11870504 0.113879
|
|
0.12 0.12547529 0.11272727 0.11743772]
|
|
|
|
mean value: 0.1170220415202459
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 1. 0.5 0.75 0.75 0.5 0.5 1. 0.4 ]
|
|
|
|
mean value: 0.715
|
|
|
|
key: train_recall
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0.81578947 0.86842105 0.81578947 0.81578947 0.86842105 0.84210526
|
|
0.86842105 0.86842105 0.83783784 0.89189189]
|
|
|
|
mean value: 0.8492887624466572
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.56388889 0.65555556 0.43888889 0.58611111 0.51944444
|
|
0.47222222 0.48333333 0.68181818 0.40454545]
|
|
|
|
mean value: 0.5505808080808081
|
|
|
|
key: train_roc_auc
|
|
value: [0.61509077 0.64512864 0.60144312 0.59772104 0.6302403 0.61211963
|
|
0.63396239 0.64885072 0.61693872 0.63901525]
|
|
|
|
mean value: 0.6240510588637974
|
|
|
|
key: test_jcc
|
|
value: [0.12903226 0.09375 0.11428571 0.0625 0.1 0.08333333
|
|
0.06896552 0.07142857 0.15151515 0.06451613]
|
|
|
|
mean value: 0.09393266749009241
|
|
|
|
key: train_jcc
|
|
value: [0.11313869 0.12177122 0.10877193 0.10763889 0.11660777 0.11149826
|
|
0.11785714 0.12313433 0.11032028 0.11578947]
|
|
|
|
mean value: 0.11465279838453969
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01099086 0.00984645 0.01053452 0.01105261 0.00988722 0.00982928
|
|
0.00986648 0.00990415 0.00991511 0.00977039]
|
|
|
|
mean value: 0.010159707069396973
|
|
|
|
key: score_time
|
|
value: [0.00962877 0.00872302 0.00948071 0.00954485 0.00863194 0.008816
|
|
0.00874686 0.00871634 0.00864601 0.0086863 ]
|
|
|
|
mean value: 0.008962082862854003
|
|
|
|
key: test_mcc
|
|
value: [ 0. -0.12171612 -0.0761387 -0.0761387 -0.0761387 0.05106882
|
|
-0.04303315 -0.0761387 -0.06953841 0.14573048]
|
|
|
|
mean value: -0.03420431671706364
|
|
|
|
key: train_mcc
|
|
value: [0.12898211 0.12381938 0.11522342 0.1096937 0.17568767 0.1096937
|
|
0.13438935 0.13438935 0.08891375 0.13259887]
|
|
|
|
mean value: 0.12533913084841808
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.7755102 0.85714286 0.85714286 0.85714286 0.7755102
|
|
0.89795918 0.85714286 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8510204081632653
|
|
|
|
key: train_accuracy
|
|
value: [0.88208617 0.87981859 0.88662132 0.88435374 0.89115646 0.88435374
|
|
0.88435374 0.88435374 0.87528345 0.88435374]
|
|
|
|
mean value: 0.8836734693877552
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0.15384615
|
|
0. 0. 0. 0.22222222]
|
|
|
|
mean value: 0.037606837606837605
|
|
|
|
key: train_fscore
|
|
value: [0.1875 0.18461538 0.16666667 0.16393443 0.22580645 0.16393443
|
|
0.19047619 0.19047619 0.15384615 0.19047619]
|
|
|
|
mean value: 0.18177320806286962
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0.11111111
|
|
0. 0. 0. 0.25 ]
|
|
|
|
mean value: 0.03611111111111111
|
|
|
|
key: train_precision
|
|
value: [0.23076923 0.22222222 0.22727273 0.2173913 0.29166667 0.2173913
|
|
0.24 0.24 0.17857143 0.23076923]
|
|
|
|
mean value: 0.22960541149671584
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0.25 0. 0. 0. 0.2 ]
|
|
|
|
mean value: 0.045
|
|
|
|
key: train_recall
|
|
value: [0.15789474 0.15789474 0.13157895 0.13157895 0.18421053 0.13157895
|
|
0.15789474 0.15789474 0.13513514 0.16216216]
|
|
|
|
mean value: 0.1507823613086771
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.42222222 0.46666667 0.46666667 0.46666667 0.53611111
|
|
0.48888889 0.46666667 0.47727273 0.56590909]
|
|
|
|
mean value: 0.4857070707070707
|
|
|
|
key: train_roc_auc
|
|
value: [0.55413347 0.55289278 0.54469766 0.54345697 0.57101345 0.54345697
|
|
0.55537417 0.55537417 0.53910222 0.55632861]
|
|
|
|
mean value: 0.5515830461188006
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0.08333333
|
|
0. 0. 0. 0.125 ]
|
|
|
|
mean value: 0.020833333333333332
|
|
|
|
key: train_jcc
|
|
value: [0.10344828 0.10169492 0.09090909 0.08928571 0.12727273 0.08928571
|
|
0.10526316 0.10526316 0.08333333 0.10526316]
|
|
|
|
mean value: 0.10010192448870968
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00935006 0.01121831 0.0095892 0.01079845 0.00927591 0.01041508
|
|
0.01066661 0.00946712 0.01172566 0.01114678]
|
|
|
|
mean value: 0.01036531925201416
|
|
|
|
key: score_time
|
|
value: [0.0587256 0.01816702 0.01686382 0.0177474 0.01632714 0.01727986
|
|
0.01850748 0.01718521 0.01835418 0.01760244]
|
|
|
|
mean value: 0.021676015853881837
|
|
|
|
key: test_mcc
|
|
value: [ 0. -0.04303315 0. 0. 0. -0.04303315
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: -0.008606629658238704
|
|
|
|
key: train_mcc
|
|
value: [ 0.17120609 0.26951584 0.21980798 0.22632986 0.17120609 0.22632986
|
|
0.17120609 -0.01463905 0.10130416 0.14359318]
|
|
|
|
mean value: 0.1685860108661663
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.89795918 0.91836735 0.91836735 0.91836735 0.89795918
|
|
0.91836735 0.91836735 0.89795918 0.89795918]
|
|
|
|
mean value: 0.9102040816326531
|
|
|
|
key: train_accuracy
|
|
value: [0.91609977 0.92063492 0.91836735 0.91836735 0.91609977 0.91836735
|
|
0.91609977 0.91156463 0.91609977 0.91609977]
|
|
|
|
mean value: 0.916780045351474
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0.09756098 0.14634146 0.1 0.14285714 0.09756098 0.14285714
|
|
0.09756098 0. 0.05128205 0.09756098]
|
|
|
|
mean value: 0.09735817028499955
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [0.66666667 1. 1. 0.75 0.66666667 0.75
|
|
0.66666667 0. 0.5 0.5 ]
|
|
|
|
mean value: 0.65
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0.05263158 0.07894737 0.05263158 0.07894737 0.05263158 0.07894737
|
|
0.05263158 0. 0.02702703 0.05405405]
|
|
|
|
mean value: 0.05284495021337127
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.48888889 0.5 0.5 0.5 0.48888889
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.49777777777777776
|
|
|
|
key: train_roc_auc
|
|
value: [0.52507509 0.53947368 0.52631579 0.53823299 0.52507509 0.53823299
|
|
0.52507509 0.49875931 0.51227589 0.52455178]
|
|
|
|
mean value: 0.5253067711045236
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0.05128205 0.07894737 0.05263158 0.07692308 0.05128205 0.07692308
|
|
0.05128205 0. 0.02631579 0.05128205]
|
|
|
|
mean value: 0.05168690958164642
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01750064 0.01720309 0.01666379 0.01516032 0.01604486 0.01654983
|
|
0.0173831 0.017133 0.01583409 0.01600838]
|
|
|
|
mean value: 0.01654810905456543
|
|
|
|
key: score_time
|
|
value: [0.01018834 0.01129341 0.01129961 0.01111412 0.01038837 0.01028562
|
|
0.01131296 0.01097918 0.01142526 0.01179028]
|
|
|
|
mean value: 0.01100771427154541
|
|
|
|
key: test_mcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_mcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.91836735 0.91836735 0.91836735 0.91836735
|
|
0.91836735 0.91836735 0.89795918 0.89795918]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [0.9138322 0.9138322 0.9138322 0.9138322 0.9138322 0.9138322
|
|
0.9138322 0.9138322 0.91609977 0.91609977]
|
|
|
|
mean value: 0.9142857142857144
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
|
|
|
|
mean value: 0.5
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.18788648 3.95891047 4.03830957 3.55614758 3.6447401 3.87987447
|
|
3.53843689 3.18595314 3.70476341 3.56264782]
|
|
|
|
mean value: 3.6257669925689697
|
|
|
|
key: score_time
|
|
value: [0.02656841 0.02715898 0.02362847 0.02622104 0.02363467 0.02286148
|
|
0.02396154 0.02025342 0.02307725 0.02346897]
|
|
|
|
mean value: 0.024083423614501952
|
|
|
|
key: test_mcc
|
|
value: [ 0.48412292 -0.04303315 0.31519816 0. -0.04303315 -0.06150208
|
|
0. 0.31519816 0.63819901 -0.06953841]
|
|
|
|
mean value: 0.1535611473097351
|
|
|
|
key: train_mcc
|
|
value: [0.91090704 0.98553239 0.97092227 1. 1. 0.97092227
|
|
0.94124564 0.8479392 0.95506312 0.97019901]
|
|
|
|
mean value: 0.9552730953431728
|
|
|
|
key: test_accuracy
|
|
value: [0.93877551 0.89795918 0.91836735 0.91836735 0.89795918 0.87755102
|
|
0.91836735 0.91836735 0.93877551 0.85714286]
|
|
|
|
mean value: 0.9081632653061225
|
|
|
|
key: train_accuracy
|
|
value: [0.98639456 0.99773243 0.99546485 1. 1. 0.99546485
|
|
0.99092971 0.97732426 0.99319728 0.99546485]
|
|
|
|
mean value: 0.9931972789115646
|
|
|
|
key: test_fscore
|
|
value: [0.4 0. 0.33333333 0. 0. 0.
|
|
0. 0.33333333 0.66666667 0. ]
|
|
|
|
mean value: 0.1733333333333333
|
|
|
|
key: train_fscore
|
|
value: [0.91428571 0.98666667 0.97297297 1. 1. 0.97297297
|
|
0.94444444 0.84848485 0.95774648 0.97222222]
|
|
|
|
mean value: 0.9569796320923082
|
|
|
|
key: test_precision
|
|
value: [1. 0. 0.5 0. 0. 0. 0. 0.5 0.75 0. ]
|
|
|
|
mean value: 0.275
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0. 0.25 0. 0. 0. 0. 0.25 0.6 0. ]
|
|
|
|
mean value: 0.135
|
|
|
|
key: train_recall
|
|
value: [0.84210526 0.97368421 0.94736842 1. 1. 0.94736842
|
|
0.89473684 0.73684211 0.91891892 0.94594595]
|
|
|
|
mean value: 0.920697012802276
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.48888889 0.61388889 0.5 0.48888889 0.47777778
|
|
0.5 0.61388889 0.78863636 0.47727273]
|
|
|
|
mean value: 0.5574242424242424
|
|
|
|
key: train_roc_auc
|
|
value: [0.92105263 0.98684211 0.97368421 1. 1. 0.97368421
|
|
0.94736842 0.86842105 0.95945946 0.97297297]
|
|
|
|
mean value: 0.960348506401138
|
|
|
|
key: test_jcc
|
|
value: [0.25 0. 0.2 0. 0. 0. 0. 0.2 0.5 0. ]
|
|
|
|
mean value: 0.115
|
|
|
|
key: train_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[0.84210526 0.97368421 0.94736842 1. 1. 0.94736842
|
|
0.89473684 0.73684211 0.91891892 0.94594595]
|
|
|
|
mean value: 0.920697012802276
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04444218 0.02825999 0.02666593 0.02614236 0.02603054 0.02829456
|
|
0.02728558 0.02906346 0.02901769 0.02945304]
|
|
|
|
mean value: 0.029465532302856444
|
|
|
|
key: score_time
|
|
value: [0.01252174 0.01257515 0.01247454 0.0125432 0.01256418 0.01267648
|
|
0.01272273 0.01255369 0.01256275 0.0126338 ]
|
|
|
|
mean value: 0.012582826614379882
|
|
|
|
key: test_mcc
|
|
value: [0.39196475 0.45555556 0.69189841 0.48412292 0.31519816 0.18333333
|
|
0.4732871 0.23476099 0.39196475 0.44038551]
|
|
|
|
mean value: 0.40624714641286286
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.89795918 0.91836735 0.95918367 0.93877551 0.91836735 0.87755102
|
|
0.87755102 0.89795918 0.89795918 0.87755102]
|
|
|
|
mean value: 0.9061224489795918
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.5 0.66666667 0.4 0.33333333 0.25
|
|
0.5 0.28571429 0.44444444 0.5 ]
|
|
|
|
mean value: 0.4324603174603175
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.4 0.5 1. 1. 0.5 0.25
|
|
0.375 0.33333333 0.5 0.42857143]
|
|
|
|
mean value: 0.5286904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.5 0.25 0.25 0.25 0.75 0.25 0.4 0.6 ]
|
|
|
|
mean value: 0.425
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.71666667 0.72777778 0.75 0.625 0.61388889 0.59166667
|
|
0.81944444 0.60277778 0.67727273 0.75454545]
|
|
|
|
mean value: 0.6879040404040404
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.33333333 0.5 0.25 0.2 0.14285714
|
|
0.33333333 0.16666667 0.28571429 0.33333333]
|
|
|
|
mean value: 0.28309523809523807
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16595054 0.16270828 0.18029261 0.18076944 0.16685081 0.16844392
|
|
0.16526675 0.16804886 0.16927338 0.16556144]
|
|
|
|
mean value: 0.16931660175323487
|
|
|
|
key: score_time
|
|
value: [0.02513862 0.02465153 0.02684212 0.02522922 0.02630067 0.02562571
|
|
0.02609682 0.02581096 0.02552414 0.02598619]
|
|
|
|
mean value: 0.025720596313476562
|
|
|
|
key: test_mcc
|
|
value: [ 0. -0.0761387 0.48412292 -0.04303315 0. -0.04303315
|
|
0. -0.04303315 0.42817442 0.42817442]
|
|
|
|
mean value: 0.11352336132173338
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.85714286 0.93877551 0.89795918 0.91836735 0.89795918
|
|
0.91836735 0.89795918 0.91836735 0.91836735]
|
|
|
|
mean value: 0.9081632653061225
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0.4 0. 0. 0.
|
|
0. 0. 0.33333333 0.33333333]
|
|
|
|
mean value: 0.10666666666666667
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0. 1. 0. 0. 0. 0. 0. 1. 1.]
|
|
|
|
mean value: 0.3
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0.25 0. 0. 0. 0. 0. 0.2 0.2 ]
|
|
|
|
mean value: 0.065
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.46666667 0.625 0.48888889 0.5 0.48888889
|
|
0.5 0.48888889 0.6 0.6 ]
|
|
|
|
mean value: 0.5258333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0.25 0. 0. 0. 0. 0. 0.2 0.2 ]
|
|
|
|
mean value: 0.065
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01531649 0.01584315 0.01534104 0.01605606 0.01548171 0.01535535
|
|
0.01499534 0.01563549 0.01619029 0.01603174]
|
|
|
|
mean value: 0.015624666213989257
|
|
|
|
key: score_time
|
|
value: [0.01331425 0.01295996 0.01364517 0.01301908 0.01337361 0.01245165
|
|
0.01364255 0.01286006 0.01346326 0.01277661]
|
|
|
|
mean value: 0.013150620460510253
|
|
|
|
key: test_mcc
|
|
value: [ 0.18333333 -0.0761387 0.39196475 -0.11136921 -0.10050378 -0.08888889
|
|
-0.10050378 -0.0761387 0.10909091 0.44038551]
|
|
|
|
mean value: 0.057123143768183095
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.87755102 0.85714286 0.89795918 0.79591837 0.81632653 0.83673469
|
|
0.81632653 0.85714286 0.83673469 0.87755102]
|
|
|
|
mean value: 0.846938775510204
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.25 0. 0.44444444 0. 0. 0.
|
|
0. 0. 0.2 0.5 ]
|
|
|
|
mean value: 0.13944444444444445
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.25 0. 0.4 0. 0. 0.
|
|
0. 0. 0.2 0.42857143]
|
|
|
|
mean value: 0.12785714285714286
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0. 0.5 0. 0. 0. 0. 0. 0.2 0.6 ]
|
|
|
|
mean value: 0.155
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.59166667 0.46666667 0.71666667 0.43333333 0.44444444 0.45555556
|
|
0.44444444 0.46666667 0.55454545 0.75454545]
|
|
|
|
mean value: 0.5328535353535353
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.14285714 0. 0.28571429 0. 0. 0.
|
|
0. 0. 0.11111111 0.33333333]
|
|
|
|
mean value: 0.0873015873015873
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.09
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest
|
|
Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.511657 2.6609199 2.73651624 2.70599651 2.56802678 2.57371593
|
|
2.9541688 2.39135957 1.58392644 1.57593513]
|
|
|
|
mean value: 2.426222229003906
|
|
|
|
key: score_time
|
|
value: [0.12711954 0.14419365 0.15040994 0.21242857 0.12433434 0.12565756
|
|
0.12718511 0.09471846 0.09949708 0.09562325]
|
|
|
|
mean value: 0.13011674880981444
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0.48412292 0. 0. -0.04303315
|
|
0. 0. 0. 0.42817442]
|
|
|
|
mean value: 0.08692641892735713
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.93877551 0.91836735 0.91836735 0.89795918
|
|
0.91836735 0.91836735 0.89795918 0.91836735]
|
|
|
|
mean value: 0.9163265306122449
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0.4 0. 0. 0.
|
|
0. 0. 0. 0.33333333]
|
|
|
|
mean value: 0.07333333333333333
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0. 1. 0. 0. 0. 0. 0. 0. 1.]
|
|
|
|
mean value: 0.2
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0.25 0. 0. 0. 0. 0. 0. 0.2 ]
|
|
|
|
mean value: 0.045
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.625 0.5 0.5 0.48888889
|
|
0.5 0.5 0.5 0.6 ]
|
|
|
|
mean value: 0.5213888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0.25 0. 0. 0. 0. 0. 0. 0.2 ]
|
|
|
|
mean value: 0.045
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
|
|
key: fit_time
|
|
value: [1.87240267 0.92059731 0.94270301 1.02526331 0.97388744 1.00820589
|
|
0.98671389 0.98033094 0.94372177 1.00554228]
|
|
|
|
mean value: 1.0659368515014649
|
|
|
|
key: score_time
|
|
value: [0.22457266 0.18168974 0.17041016 0.23129153 0.14785528 0.19765759
|
|
0.17161727 0.16478586 0.20402384 0.18627763]
|
|
|
|
mean value: 0.18801815509796144
|
|
|
|
key: test_mcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_mcc
|
|
value: [0.21980798 0.15525099 0.15525099 0.21980798 0.26951584 0.21980798
|
|
0.15525099 0.21980798 0. 0. ]
|
|
|
|
mean value: 0.16145007550038468
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.91836735 0.91836735 0.91836735 0.91836735
|
|
0.91836735 0.91836735 0.89795918 0.89795918]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [0.91836735 0.91609977 0.91609977 0.91836735 0.92063492 0.91836735
|
|
0.91609977 0.91836735 0.91609977 0.91609977]
|
|
|
|
mean value: 0.9174603174603174
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0.1 0.05128205 0.05128205 0.1 0.14634146 0.1
|
|
0.05128205 0.1 0. 0. ]
|
|
|
|
mean value: 0.0700187617260788
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 0. 0.]
|
|
|
|
mean value: 0.8
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0.05263158 0.02631579 0.02631579 0.05263158 0.07894737 0.05263158
|
|
0.02631579 0.05263158 0. 0. ]
|
|
|
|
mean value: 0.03684210526315789
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_roc_auc
|
|
value: [0.52631579 0.51315789 0.51315789 0.52631579 0.53947368 0.52631579
|
|
0.51315789 0.52631579 0.5 0.5 ]
|
|
|
|
mean value: 0.5184210526315789
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0.05263158 0.02631579 0.02631579 0.05263158 0.07894737 0.05263158
|
|
0.02631579 0.05263158 0. 0. ]
|
|
|
|
mean value: 0.03684210526315789
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02214622 0.01019287 0.01012945 0.01093054 0.01107836 0.01018834
|
|
0.01045132 0.01022482 0.01037598 0.01046848]
|
|
|
|
mean value: 0.011618638038635254
|
|
|
|
key: score_time
|
|
value: [0.00954342 0.00898814 0.01015663 0.00912404 0.00890636 0.00884342
|
|
0.00895214 0.00912237 0.00912523 0.00937796]
|
|
|
|
mean value: 0.009213972091674804
|
|
|
|
key: test_mcc
|
|
value: [ 0. -0.12171612 -0.0761387 -0.0761387 -0.0761387 0.05106882
|
|
-0.04303315 -0.0761387 -0.06953841 0.14573048]
|
|
|
|
mean value: -0.03420431671706364
|
|
|
|
key: train_mcc
|
|
value: [0.12898211 0.12381938 0.11522342 0.1096937 0.17568767 0.1096937
|
|
0.13438935 0.13438935 0.08891375 0.13259887]
|
|
|
|
mean value: 0.12533913084841808
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.7755102 0.85714286 0.85714286 0.85714286 0.7755102
|
|
0.89795918 0.85714286 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8510204081632653
|
|
|
|
key: train_accuracy
|
|
value: [0.88208617 0.87981859 0.88662132 0.88435374 0.89115646 0.88435374
|
|
0.88435374 0.88435374 0.87528345 0.88435374]
|
|
|
|
mean value: 0.8836734693877552
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0.15384615
|
|
0. 0. 0. 0.22222222]
|
|
|
|
mean value: 0.037606837606837605
|
|
|
|
key: train_fscore
|
|
value: [0.1875 0.18461538 0.16666667 0.16393443 0.22580645 0.16393443
|
|
0.19047619 0.19047619 0.15384615 0.19047619]
|
|
|
|
mean value: 0.18177320806286962
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0.11111111
|
|
0. 0. 0. 0.25 ]
|
|
|
|
mean value: 0.03611111111111111
|
|
|
|
key: train_precision
|
|
value: [0.23076923 0.22222222 0.22727273 0.2173913 0.29166667 0.2173913
|
|
0.24 0.24 0.17857143 0.23076923]
|
|
|
|
mean value: 0.22960541149671584
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0.25 0. 0. 0. 0.2 ]
|
|
|
|
mean value: 0.045
|
|
|
|
key: train_recall
|
|
value: [0.15789474 0.15789474 0.13157895 0.13157895 0.18421053 0.13157895
|
|
0.15789474 0.15789474 0.13513514 0.16216216]
|
|
|
|
mean value: 0.1507823613086771
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.42222222 0.46666667 0.46666667 0.46666667 0.53611111
|
|
0.48888889 0.46666667 0.47727273 0.56590909]
|
|
|
|
mean value: 0.4857070707070707
|
|
|
|
key: train_roc_auc
|
|
value: [0.55413347 0.55289278 0.54469766 0.54345697 0.57101345 0.54345697
|
|
0.55537417 0.55537417 0.53910222 0.55632861]
|
|
|
|
mean value: 0.5515830461188006
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0.08333333
|
|
0. 0. 0. 0.125 ]
|
|
|
|
mean value: 0.020833333333333332
|
|
|
|
key: train_jcc
|
|
value: [0.10344828 0.10169492 0.09090909 0.08928571 0.12727273 0.08928571
|
|
0.10526316 0.10526316 0.08333333 0.10526316]
|
|
|
|
mean value: 0.10010192448870968
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: XGBoost
|
|
Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [1.91404438 1.95080733 1.91543627 1.86043501 1.85784006 1.88353372
|
|
1.8658371 1.87171507 1.92222667 1.89931059]
|
|
|
|
mean value: 1.8941186189651489
|
|
|
|
key: score_time
|
|
value: [0.01306772 0.01308584 0.01254106 0.01320124 0.01275206 0.01218367
|
|
0.01292706 0.01304483 0.01314259 0.01243806]
|
|
|
|
mean value: 0.012838411331176757
|
|
|
|
key: test_mcc
|
|
value: [0.45555556 0.69189841 0.31519816 0. 0.23476099 0.18333333
|
|
0.39196475 0.18333333 0.61193797 0.63819901]
|
|
|
|
mean value: 0.37061815143644256
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.95918367 0.91836735 0.91836735 0.89795918 0.87755102
|
|
0.89795918 0.87755102 0.93877551 0.93877551]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.66666667 0.33333333 0. 0.28571429 0.25
|
|
0.44444444 0.25 0.57142857 0.66666667]
|
|
|
|
mean value: 0.3968253968253968
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.5 0. 0.33333333 0.25
|
|
0.4 0.25 1. 0.75 ]
|
|
|
|
mean value: 0.49833333333333335
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.25 0. 0.25 0.25 0.5 0.25 0.4 0.6 ]
|
|
|
|
mean value: 0.35
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.72777778 0.75 0.61388889 0.5 0.60277778 0.59166667
|
|
0.71666667 0.59166667 0.7 0.78863636]
|
|
|
|
mean value: 0.6583080808080808
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.5 0.2 0. 0.16666667 0.14285714
|
|
0.28571429 0.14285714 0.4 0.5 ]
|
|
|
|
mean value: 0.2671428571428571
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04829502 0.08163071 0.08650327 0.08173466 0.09164286 0.07776999
|
|
0.08226776 0.07278728 0.0728929 0.07288504]
|
|
|
|
mean value: 0.07684094905853271
|
|
|
|
key: score_time
|
|
value: [0.02097511 0.02415705 0.02215767 0.02176356 0.02373457 0.01368833
|
|
0.02113962 0.02425647 0.02468157 0.02432299]
|
|
|
|
mean value: 0.022087693214416504
|
|
|
|
key: test_mcc
|
|
value: [-0.06150208 -0.08888889 0.54566067 -0.04303315 -0.0761387 -0.0761387
|
|
0. 0.51729353 0.77727273 0.10909091]
|
|
|
|
mean value: 0.1603616322121507
|
|
|
|
key: train_mcc
|
|
value: [0.63055732 0.67419865 0.63576126 0.68710837 0.6804679 0.70530396
|
|
0.65721347 0.69285726 0.64597271 0.67909883]
|
|
|
|
mean value: 0.6688539706509772
|
|
|
|
key: test_accuracy
|
|
value: [0.87755102 0.83673469 0.93877551 0.89795918 0.85714286 0.85714286
|
|
0.91836735 0.89795918 0.95918367 0.83673469]
|
|
|
|
mean value: 0.8877551020408163
|
|
|
|
key: train_accuracy
|
|
value: [0.94557823 0.95238095 0.94331066 0.95238095 0.95238095 0.95464853
|
|
0.9478458 0.95464853 0.94557823 0.95238095]
|
|
|
|
mean value: 0.9501133786848073
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0.57142857 0. 0. 0.
|
|
0. 0.54545455 0.8 0.2 ]
|
|
|
|
mean value: 0.2116883116883117
|
|
|
|
key: train_fscore
|
|
value: [0.65714286 0.69565217 0.66666667 0.71232877 0.70422535 0.72972973
|
|
0.68493151 0.71428571 0.67567568 0.70422535]
|
|
|
|
mean value: 0.6944863795611642
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0.66666667 0. 0. 0.
|
|
0. 0.42857143 0.8 0.2 ]
|
|
|
|
mean value: 0.20952380952380953
|
|
|
|
key: train_precision
|
|
value: [0.71875 0.77419355 0.67567568 0.74285714 0.75757576 0.75
|
|
0.71428571 0.78125 0.67567568 0.73529412]
|
|
|
|
mean value: 0.7325557632104122
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0.5 0. 0. 0. 0. 0.75 0.8 0.2 ]
|
|
|
|
mean value: 0.225
|
|
|
|
key: train_recall
|
|
value: [0.60526316 0.63157895 0.65789474 0.68421053 0.65789474 0.71052632
|
|
0.65789474 0.65789474 0.67567568 0.67567568]
|
|
|
|
mean value: 0.6614509246088194
|
|
|
|
key: test_roc_auc
|
|
value: [0.47777778 0.45555556 0.73888889 0.48888889 0.46666667 0.46666667
|
|
0.5 0.83055556 0.88863636 0.55454545]
|
|
|
|
mean value: 0.5868181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [0.79146533 0.80710461 0.81405903 0.83093901 0.81902181 0.8440969
|
|
0.81654042 0.8202625 0.82298635 0.82669922]
|
|
|
|
mean value: 0.8193175194009381
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0.4 0. 0. 0.
|
|
0. 0.375 0.66666667 0.11111111]
|
|
|
|
mean value: 0.15527777777777776
|
|
|
|
key: train_jcc
|
|
value: [0.4893617 0.53333333 0.5 0.55319149 0.54347826 0.57446809
|
|
0.52083333 0.55555556 0.51020408 0.54347826]
|
|
|
|
mean value: 0.532390410218975
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01636457 0.01058745 0.0118866 0.01034212 0.01019907 0.01062298
|
|
0.01030517 0.01025558 0.01034403 0.01009321]
|
|
|
|
mean value: 0.011100077629089355
|
|
|
|
key: score_time
|
|
value: [0.00971293 0.00916743 0.00885105 0.00919914 0.00887847 0.00926256
|
|
0.00955653 0.00959873 0.00913906 0.00894642]
|
|
|
|
mean value: 0.009231233596801757
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. -0.04303315 0. 0.48412292 0.
|
|
0. -0.06150208 -0.04865618 -0.06953841]
|
|
|
|
mean value: 0.026139309920244498
|
|
|
|
key: train_mcc
|
|
value: [0.20045625 0.17320066 0.22463629 0.15496613 0.10344587 0.17320066
|
|
0.17035431 0.20986142 0.24687242 0.26627597]
|
|
|
|
mean value: 0.19232699776493647
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.89795918 0.91836735 0.93877551 0.91836735
|
|
0.91836735 0.87755102 0.87755102 0.85714286]
|
|
|
|
mean value: 0.9040816326530612
|
|
|
|
key: train_accuracy
|
|
value: [0.9138322 0.9138322 0.9138322 0.91156463 0.90929705 0.9138322
|
|
0.90929705 0.90702948 0.92063492 0.91836735]
|
|
|
|
mean value: 0.9131519274376417
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0.4 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.04
|
|
|
|
key: train_fscore
|
|
value: [0.17391304 0.13636364 0.20833333 0.13333333 0.09090909 0.13636364
|
|
0.16666667 0.22641509 0.18604651 0.25 ]
|
|
|
|
mean value: 0.17083443464154874
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.1
|
|
|
|
key: train_precision
|
|
value: [0.5 0.5 0.5 0.42857143 0.33333333 0.5
|
|
0.4 0.4 0.66666667 0.54545455]
|
|
|
|
mean value: 0.4774025974025974
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0.25 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.025
|
|
|
|
key: train_recall
|
|
value: [0.10526316 0.07894737 0.13157895 0.07894737 0.05263158 0.07894737
|
|
0.10526316 0.15789474 0.10810811 0.16216216]
|
|
|
|
mean value: 0.10597439544807966
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.48888889 0.5 0.625 0.5
|
|
0.5 0.47777778 0.48863636 0.47727273]
|
|
|
|
mean value: 0.5057575757575757
|
|
|
|
key: train_roc_auc
|
|
value: [0.5476688 0.5357516 0.559586 0.53451091 0.52135301 0.5357516
|
|
0.54518741 0.56778112 0.55157881 0.57489296]
|
|
|
|
mean value: 0.5474062208918653
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0.25 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.025
|
|
|
|
key: train_jcc
|
|
value: [0.0952381 0.07317073 0.11627907 0.07142857 0.04761905 0.07317073
|
|
0.09090909 0.12765957 0.1025641 0.14285714]
|
|
|
|
mean value: 0.09408961582662118
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01483154 0.01903224 0.02042961 0.02135682 0.01776505 0.01605248
|
|
0.01944971 0.01732755 0.02086616 0.01699185]
|
|
|
|
mean value: 0.018410301208496092
|
|
|
|
key: score_time
|
|
value: [0.00925016 0.01185584 0.01208711 0.01204515 0.0121758 0.0120337
|
|
0.01207423 0.01195645 0.01202345 0.01198649]
|
|
|
|
mean value: 0.011748838424682616
|
|
|
|
key: test_mcc
|
|
value: [0. 0. 0. 0. 0. 0.
|
|
0. 0.39196475 0.88443328 0. ]
|
|
|
|
mean value: 0.12763980253791993
|
|
|
|
key: train_mcc
|
|
value: [0.15525099 0.31156596 0.15525099 0.31156596 0.34874058 0.4949887
|
|
0.34874058 0.52615818 0.67273657 0.41909858]
|
|
|
|
mean value: 0.37440970956266323
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.91836735 0.91836735 0.91836735 0.91836735
|
|
0.91836735 0.89795918 0.97959184 0.89795918]
|
|
|
|
mean value: 0.9204081632653062
|
|
|
|
key: train_accuracy
|
|
value: [0.91609977 0.92290249 0.91609977 0.92290249 0.92517007 0.93650794
|
|
0.92517007 0.93877551 0.94331066 0.93197279]
|
|
|
|
mean value: 0.927891156462585
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0.
|
|
0. 0.44444444 0.88888889 0. ]
|
|
|
|
mean value: 0.13333333333333333
|
|
|
|
key: train_fscore
|
|
value: [0.05128205 0.19047619 0.05128205 0.19047619 0.23255814 0.44
|
|
0.23255814 0.50909091 0.69879518 0.34782609]
|
|
|
|
mean value: 0.2944344939356573
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0.4 1. 0. ]
|
|
|
|
mean value: 0.14
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[1. 1. 1. 1. 1. 0.91666667
|
|
1. 0.82352941 0.63043478 0.88888889]
|
|
|
|
mean value: 0.9259519749928957
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0.5 0.8 0. ]
|
|
|
|
mean value: 0.13
|
|
|
|
key: train_recall
|
|
value: [0.02631579 0.10526316 0.02631579 0.10526316 0.13157895 0.28947368
|
|
0.13157895 0.36842105 0.78378378 0.21621622]
|
|
|
|
mean value: 0.21842105263157893
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5
|
|
0.5 0.71666667 0.9 0.5 ]
|
|
|
|
mean value: 0.5616666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.51315789 0.55263158 0.51315789 0.55263158 0.56578947 0.64349615
|
|
0.56578947 0.68048844 0.87085229 0.60687048]
|
|
|
|
mean value: 0.6064865256278795
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0.
|
|
0. 0.28571429 0.8 0. ]
|
|
|
|
mean value: 0.10857142857142857
|
|
|
|
key: train_jcc
|
|
value: [0.02631579 0.10526316 0.02631579 0.10526316 0.13157895 0.28205128
|
|
0.13157895 0.34146341 0.53703704 0.21052632]
|
|
|
|
mean value: 0.18973938389856235
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0171895 0.0202055 0.02057743 0.02127457 0.01993489 0.0188067
|
|
0.01804399 0.02111411 0.01781034 0.02032399]
|
|
|
|
mean value: 0.01952810287475586
|
|
|
|
key: score_time
|
|
value: [0.00938988 0.01202083 0.01198673 0.0120945 0.0120728 0.01201248
|
|
0.01205993 0.01217771 0.01196194 0.01210904]
|
|
|
|
mean value: 0.011788582801818848
|
|
|
|
key: test_mcc
|
|
value: [0. 0.23476099 0.39196475 0. 0. 0.
|
|
0. 0.23476099 0.42817442 0.01421338]
|
|
|
|
mean value: 0.13038745240335475
|
|
|
|
key: train_mcc
|
|
value: [0.46904622 0.68224389 0.71901653 0.63877615 0.54543679 0.15525099
|
|
0.15525099 0.56755835 0. 0.66790017]
|
|
|
|
mean value: 0.46004800748540514
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.89795918 0.89795918 0.91836735 0.91836735 0.91836735
|
|
0.91836735 0.89795918 0.91836735 0.75510204]
|
|
|
|
mean value: 0.8959183673469387
|
|
|
|
key: train_accuracy
|
|
value: [0.93424036 0.95464853 0.95464853 0.95011338 0.94104308 0.91609977
|
|
0.91609977 0.94331066 0.91609977 0.93650794]
|
|
|
|
mean value: 0.936281179138322
|
|
|
|
key: test_fscore
|
|
value: [0. 0.28571429 0.44444444 0. 0. 0.
|
|
0. 0.28571429 0.33333333 0.14285714]
|
|
|
|
mean value: 0.14920634920634923
|
|
|
|
key: train_fscore
|
|
value: [0.40816327 0.6969697 0.74358974 0.64516129 0.51851852 0.05128205
|
|
0.05128205 0.50980392 0. 0.68888889]
|
|
|
|
mean value: 0.4313659427728281
|
|
|
|
key: test_precision
|
|
value: [0. 0.33333333 0.4 0. 0. 0.
|
|
0. 0.33333333 1. 0.11111111]
|
|
|
|
mean value: 0.21777777777777776
|
|
|
|
key: train_precision
|
|
value: [0.90909091 0.82142857 0.725 0.83333333 0.875 1.
|
|
1. 1. 0. 0.58490566]
|
|
|
|
mean value: 0.7748758474230172
|
|
|
|
key: test_recall
|
|
value: [0. 0.25 0.5 0. 0. 0. 0. 0.25 0.2 0.2 ]
|
|
|
|
mean value: 0.14
|
|
|
|
key: train_recall
|
|
value: [0.26315789 0.60526316 0.76315789 0.52631579 0.36842105 0.02631579
|
|
0.02631579 0.34210526 0. 0.83783784]
|
|
|
|
mean value: 0.3758890469416785
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.60277778 0.71666667 0.5 0.5 0.5
|
|
0.5 0.60277778 0.6 0.50909091]
|
|
|
|
mean value: 0.5531313131313131
|
|
|
|
key: train_roc_auc
|
|
value: [0.63033825 0.79642811 0.8679313 0.75819512 0.68172914 0.51315789
|
|
0.51315789 0.67105263 0.5 0.8916912 ]
|
|
|
|
mean value: 0.6823681531787231
|
|
|
|
key: test_jcc
|
|
value: [0. 0.16666667 0.28571429 0. 0. 0.
|
|
0. 0.16666667 0.2 0.07692308]
|
|
|
|
mean value: 0.08959706959706959
|
|
|
|
key: train_jcc
|
|
value: [0.25641026 0.53488372 0.59183673 0.47619048 0.35 0.02631579
|
|
0.02631579 0.34210526 0. 0.52542373]
|
|
|
|
mean value: 0.3129481759143665
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.39
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17346883 0.16672826 0.16715455 0.16484785 0.16440916 0.16464901
|
|
0.16453218 0.16446137 0.23316431 0.23558855]
|
|
|
|
mean value: 0.1799004077911377
|
|
|
|
key: score_time
|
|
value: [0.01536727 0.0161221 0.01689577 0.01533723 0.01530814 0.01518321
|
|
0.0152173 0.01604867 0.01753139 0.0160749 ]
|
|
|
|
mean value: 0.01590859889984131
|
|
|
|
key: test_mcc
|
|
value: [ 0.11600959 0.31519816 0.23476099 -0.04303315 0.48412292 0.18333333
|
|
0.18333333 -0.08888889 0.75757049 0.42817442]
|
|
|
|
mean value: 0.2570581198109228
|
|
|
|
key: train_mcc
|
|
value: [0.98553239 1. 0.98553239 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971064789492542
|
|
|
|
key: test_accuracy
|
|
value: [0.83673469 0.91836735 0.89795918 0.89795918 0.93877551 0.87755102
|
|
0.87755102 0.83673469 0.95918367 0.91836735]
|
|
|
|
mean value: 0.8959183673469387
|
|
|
|
key: train_accuracy
|
|
value: [0.99773243 1. 0.99773243 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999546485260771
|
|
|
|
key: test_fscore
|
|
value: [0.2 0.33333333 0.28571429 0. 0.4 0.25
|
|
0.25 0. 0.75 0.33333333]
|
|
|
|
mean value: 0.28023809523809523
|
|
|
|
key: train_fscore
|
|
value: [0.98666667 1. 0.98666667 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9973333333333333
|
|
|
|
key: test_precision
|
|
value: [0.16666667 0.5 0.33333333 0. 1. 0.25
|
|
0.25 0. 1. 1. ]
|
|
|
|
mean value: 0.45
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0.25 0.25 0. 0.25 0.25 0.25 0. 0.6 0.2 ]
|
|
|
|
mean value: 0.23
|
|
|
|
key: train_recall
|
|
value: [0.97368421 1. 0.97368421 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9947368421052631
|
|
|
|
key: test_roc_auc
|
|
value: [0.56944444 0.61388889 0.60277778 0.48888889 0.625 0.59166667
|
|
0.59166667 0.45555556 0.8 0.6 ]
|
|
|
|
mean value: 0.5938888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.98684211 1. 0.98684211 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9973684210526316
|
|
|
|
key: test_jcc
|
|
value: [0.11111111 0.2 0.16666667 0. 0.25 0.14285714
|
|
0.14285714 0. 0.6 0.2 ]
|
|
|
|
mean value: 0.18134920634920634
|
|
|
|
key: train_jcc
|
|
value: [0.97368421 1. 0.97368421 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9947368421052631
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04897141 0.07136083 0.07262063 0.05307555 0.06537533 0.05338025
|
|
0.05486774 0.0607903 0.05490017 0.09765506]
|
|
|
|
mean value: 0.06329972743988037
|
|
|
|
key: score_time
|
|
value: [0.01924968 0.02025127 0.03471756 0.0237031 0.03000498 0.02125335
|
|
0.02229643 0.02267528 0.02725077 0.03051805]
|
|
|
|
mean value: 0.02519204616546631
|
|
|
|
key: test_mcc
|
|
value: [ 0.31519816 0.54566067 0.54566067 0. 0.23476099 -0.06150208
|
|
0.18333333 0.23476099 0.63819901 0.47635114]
|
|
|
|
mean value: 0.3112422892711525
|
|
|
|
key: train_mcc
|
|
value: [0.95616255 0.91095157 0.94140913 0.9563848 0.9261634 0.97120282
|
|
0.97092227 0.98553239 0.92427915 0.95506312]
|
|
|
|
mean value: 0.9498071203181968
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.93877551 0.93877551 0.91836735 0.89795918 0.87755102
|
|
0.87755102 0.89795918 0.93877551 0.91836735]
|
|
|
|
mean value: 0.9122448979591837
|
|
|
|
key: train_accuracy
|
|
value: [0.99319728 0.98639456 0.99092971 0.99319728 0.98866213 0.99546485
|
|
0.99546485 0.99773243 0.98866213 0.99319728]
|
|
|
|
mean value: 0.9922902494331066
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.57142857 0.57142857 0. 0.28571429 0.
|
|
0.25 0.28571429 0.66666667 0.5 ]
|
|
|
|
mean value: 0.3464285714285714
|
|
|
|
key: train_fscore
|
|
value: [0.95890411 0.91666667 0.94594595 0.96 0.92957746 0.97368421
|
|
0.97297297 0.98666667 0.92753623 0.95774648]
|
|
|
|
mean value: 0.9529700747913639
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.66666667 0. 0.33333333 0.
|
|
0.25 0.33333333 0.75 0.66666667]
|
|
|
|
mean value: 0.41666666666666663
|
|
|
|
key: train_precision
|
|
value: [1. 0.97058824 0.97222222 0.97297297 1. 0.97368421
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9889467641015629
|
|
|
|
key: test_recall
|
|
value: [0.25 0.5 0.5 0. 0.25 0. 0.25 0.25 0.6 0.4 ]
|
|
|
|
mean value: 0.3
|
|
|
|
key: train_recall
|
|
value: [0.92105263 0.86842105 0.92105263 0.94736842 0.86842105 0.97368421
|
|
0.94736842 0.97368421 0.86486486 0.91891892]
|
|
|
|
mean value: 0.9204836415362732
|
|
|
|
key: test_roc_auc
|
|
value: [0.61388889 0.73888889 0.73888889 0.5 0.60277778 0.47777778
|
|
0.59166667 0.60277778 0.78863636 0.68863636]
|
|
|
|
mean value: 0.6343939393939394
|
|
|
|
key: train_roc_auc
|
|
value: [0.96052632 0.93296983 0.95928562 0.97244352 0.93421053 0.98560141
|
|
0.97368421 0.98684211 0.93243243 0.95945946]
|
|
|
|
mean value: 0.9597455428525038
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.4 0.4 0. 0.16666667 0.
|
|
0.14285714 0.16666667 0.5 0.33333333]
|
|
|
|
mean value: 0.23095238095238096
|
|
|
|
key: train_jcc
|
|
value: [0.92105263 0.84615385 0.8974359 0.92307692 0.86842105 0.94871795
|
|
0.94736842 0.97368421 0.86486486 0.91891892]
|
|
|
|
mean value: 0.9109694714957873
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20358229 0.26056981 0.20863891 0.18940997 0.19927526 0.20116067
|
|
0.19218516 0.19440627 0.17027235 0.70204473]
|
|
|
|
mean value: 0.252154541015625
|
|
|
|
key: score_time
|
|
value: [0.02740455 0.04114413 0.02895832 0.02369118 0.02096534 0.02365518
|
|
0.0233798 0.02371001 0.02752447 0.04003978]
|
|
|
|
mean value: 0.02804727554321289
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0.48412292 -0.04303315 -0.0761387 -0.04303315
|
|
0. -0.04303315 0. 0. ]
|
|
|
|
mean value: 0.027888477463965847
|
|
|
|
key: train_mcc
|
|
value: [0.70979784 0.78126491 0.78126491 0.70979784 0.7281917 0.79833297
|
|
0.76389999 0.74621685 0.70114912 0.77511403]
|
|
|
|
mean value: 0.7495030165904599
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.93877551 0.89795918 0.85714286 0.89795918
|
|
0.91836735 0.89795918 0.89795918 0.89795918]
|
|
|
|
mean value: 0.9040816326530612
|
|
|
|
key: train_accuracy
|
|
value: [0.95918367 0.96825397 0.96825397 0.95918367 0.96145125 0.97052154
|
|
0.96598639 0.96371882 0.95918367 0.96825397]
|
|
|
|
mean value: 0.9643990929705215
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0.4 0. 0. 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.04
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0.68965517 0.77419355 0.77419355 0.68965517 0.71186441 0.79365079
|
|
0.75409836 0.73333333 0.67857143 0.76666667]
|
|
|
|
mean value: 0.7365882431259401
|
|
|
|
key: test_precision
|
|
value: [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.1
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0.25 0. 0. 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.025
|
|
|
|
key: train_recall
|
|
value: [0.52631579 0.63157895 0.63157895 0.52631579 0.55263158 0.65789474
|
|
0.60526316 0.57894737 0.51351351 0.62162162]
|
|
|
|
mean value: 0.5845661450924609
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.625 0.48888889 0.46666667 0.48888889
|
|
0.5 0.48888889 0.5 0.5 ]
|
|
|
|
mean value: 0.5058333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.76315789 0.81578947 0.81578947 0.76315789 0.77631579 0.82894737
|
|
0.80263158 0.78947368 0.75675676 0.81081081]
|
|
|
|
mean value: 0.7922830725462304
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0.25 0. 0. 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.025
|
|
|
|
key: train_jcc
|
|
value: [0.52631579 0.63157895 0.63157895 0.52631579 0.55263158 0.65789474
|
|
0.60526316 0.57894737 0.51351351 0.62162162]
|
|
|
|
mean value: 0.5845661450924609
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.65099359 0.66221213 0.743016 0.66147923 0.7318399 0.65031075
|
|
0.65333033 0.74388433 0.6669414 0.74673963]
|
|
|
|
mean value: 0.6910747289657593
|
|
|
|
key: score_time
|
|
value: [0.01011682 0.01436234 0.00960088 0.00908041 0.01308346 0.01021147
|
|
0.01009154 0.00949955 0.01065969 0.01546764]
|
|
|
|
mean value: 0.011217379570007324
|
|
|
|
key: test_mcc
|
|
value: [ 0.54566067 0.69189841 0.54566067 -0.04303315 -0.04303315 0.45555556
|
|
0.54566067 0.18333333 0.47635114 0.61193797]
|
|
|
|
mean value: 0.3969992136456525
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93877551 0.95918367 0.93877551 0.89795918 0.89795918 0.91836735
|
|
0.93877551 0.87755102 0.91836735 0.93877551]
|
|
|
|
mean value: 0.9224489795918367
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.66666667 0.57142857 0. 0. 0.5
|
|
0.57142857 0.25 0.5 0.57142857]
|
|
|
|
mean value: 0.42023809523809524
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.66666667 0. 0. 0.5
|
|
0.66666667 0.25 0.66666667 1. ]
|
|
|
|
mean value: 0.5416666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.5 0. 0. 0.5 0.5 0.25 0.4 0.4 ]
|
|
|
|
mean value: 0.355
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.73888889 0.75 0.73888889 0.48888889 0.48888889 0.72777778
|
|
0.73888889 0.59166667 0.68863636 0.7 ]
|
|
|
|
mean value: 0.6652525252525252
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.5 0.4 0. 0. 0.33333333
|
|
0.4 0.14285714 0.33333333 0.4 ]
|
|
|
|
mean value: 0.29095238095238096
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0881784 0.04556727 0.03100348 0.03141069 0.03197694 0.03586626
|
|
0.0362916 0.02916551 0.02885985 0.98168325]
|
|
|
|
mean value: 0.1340003252029419
|
|
|
|
key: score_time
|
|
value: [0.02748942 0.01332903 0.01323581 0.01288056 0.01269412 0.01340675
|
|
0.01357985 0.01266384 0.01294112 0.02256155]
|
|
|
|
mean value: 0.015478205680847169
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.18333333 -0.04303315 0. -0.06150208 -0.04303315
|
|
0. -0.0761387 0. 0.19513179]
|
|
|
|
mean value: 0.015475804967888404
|
|
|
|
key: train_mcc
|
|
value: [0. 0.15525099 0. 0. 0. 0.15525099
|
|
0. 0. 0. 0.27347233]
|
|
|
|
mean value: 0.058397431701974215
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.87755102 0.89795918 0.91836735 0.87755102 0.89795918
|
|
0.91836735 0.85714286 0.89795918 0.87755102]
|
|
|
|
mean value: 0.8938775510204081
|
|
|
|
key: train_accuracy
|
|
value: [0.9138322 0.91609977 0.9138322 0.9138322 0.9138322 0.91609977
|
|
0.9138322 0.9138322 0.91609977 0.92290249]
|
|
|
|
mean value: 0.9154195011337869
|
|
|
|
key: test_fscore
|
|
value: [0. 0.25 0. 0. 0. 0. 0. 0. 0. 0.25]
|
|
|
|
mean value: 0.05
|
|
|
|
key: train_fscore
|
|
value: [0. 0.05128205 0. 0. 0. 0.05128205
|
|
0. 0. 0. 0.15 ]
|
|
|
|
mean value: 0.025256410256410254
|
|
|
|
key: test_precision
|
|
value: [0. 0.25 0. 0. 0. 0.
|
|
0. 0. 0. 0.33333333]
|
|
|
|
mean value: 0.058333333333333334
|
|
|
|
key: train_precision
|
|
value: [0. 1. 0. 0. 0. 1. 0. 0. 0. 1.]
|
|
|
|
mean value: 0.3
|
|
|
|
key: test_recall
|
|
value: [0. 0.25 0. 0. 0. 0. 0. 0. 0. 0.2 ]
|
|
|
|
mean value: 0.045
|
|
|
|
key: train_recall
|
|
value: [0. 0.02631579 0. 0. 0. 0.02631579
|
|
0. 0. 0. 0.08108108]
|
|
|
|
mean value: 0.01337126600284495
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.59166667 0.48888889 0.5 0.47777778 0.48888889
|
|
0.5 0.46666667 0.5 0.57727273]
|
|
|
|
mean value: 0.5091161616161616
|
|
|
|
key: train_roc_auc
|
|
value: [0.5 0.51315789 0.5 0.5 0.5 0.51315789
|
|
0.5 0.5 0.5 0.54054054]
|
|
|
|
mean value: 0.5066856330014224
|
|
|
|
key: test_jcc
|
|
value: [0. 0.14285714 0. 0. 0. 0.
|
|
0. 0. 0. 0.14285714]
|
|
|
|
mean value: 0.02857142857142857
|
|
|
|
key: train_jcc
|
|
value: [0. 0.02631579 0. 0. 0. 0.02631579
|
|
0. 0. 0. 0.08108108]
|
|
|
|
mean value: 0.01337126600284495
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04132891 0.05884194 0.03411841 0.03643966 0.045403 0.05388737
|
|
0.05040932 0.04038811 0.03382921 0.03948689]
|
|
|
|
mean value: 0.04341328144073486
|
|
|
|
key: score_time
|
|
value: [0.02198577 0.02377987 0.02565217 0.02495337 0.02321053 0.02282596
|
|
0.02405143 0.02364349 0.02357221 0.02519584]
|
|
|
|
mean value: 0.023887062072753908
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0.48412292 0. 0. -0.04303315
|
|
0. -0.04303315 0.61193797 0. ]
|
|
|
|
mean value: 0.10099945965785631
|
|
|
|
key: train_mcc
|
|
value: [0.4135851 0.44265133 0.34893035 0.34874058 0.34893035 0.38246502
|
|
0.38246502 0.34874058 0.31613974 0.41965652]
|
|
|
|
mean value: 0.37523045910685154
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.93877551 0.91836735 0.91836735 0.89795918
|
|
0.91836735 0.89795918 0.93877551 0.89795918]
|
|
|
|
mean value: 0.9163265306122449
|
|
|
|
key: train_accuracy
|
|
value: [0.92970522 0.93197279 0.92517007 0.92517007 0.92517007 0.92743764
|
|
0.92743764 0.92517007 0.92517007 0.93197279]
|
|
|
|
mean value: 0.9274376417233561
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0.4 0. 0. 0.
|
|
0. 0. 0.57142857 0. ]
|
|
|
|
mean value: 0.09714285714285716
|
|
|
|
key: train_fscore
|
|
value: [0.31111111 0.34782609 0.26666667 0.23255814 0.26666667 0.27272727
|
|
0.27272727 0.23255814 0.19512195 0.31818182]
|
|
|
|
mean value: 0.27161451253266095
|
|
|
|
key: test_precision
|
|
value: [0. 0. 1. 0. 0. 0. 0. 0. 1. 0.]
|
|
|
|
mean value: 0.2
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.85714286 1. 0.85714286 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9714285714285714
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0.25 0. 0. 0. 0. 0. 0.4 0. ]
|
|
|
|
mean value: 0.065
|
|
|
|
key: train_recall
|
|
value: [0.18421053 0.21052632 0.15789474 0.13157895 0.15789474 0.15789474
|
|
0.15789474 0.13157895 0.10810811 0.18918919]
|
|
|
|
mean value: 0.15867709815078235
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.625 0.5 0.5 0.48888889
|
|
0.5 0.48888889 0.7 0.5 ]
|
|
|
|
mean value: 0.5302777777777777
|
|
|
|
key: train_roc_auc
|
|
value: [0.59210526 0.60526316 0.57770667 0.56578947 0.57770667 0.57894737
|
|
0.57894737 0.56578947 0.55405405 0.59459459]
|
|
|
|
mean value: 0.5790904101175748
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0.25 0. 0. 0. 0. 0. 0.4 0. ]
|
|
|
|
mean value: 0.065
|
|
|
|
key: train_jcc
|
|
value: [0.18421053 0.21052632 0.15384615 0.13157895 0.15384615 0.15789474
|
|
0.15789474 0.13157895 0.10810811 0.18918919]
|
|
|
|
mean value: 0.15786738155159208
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.29513454 0.39091372 0.34530187 1.49989367 0.28753567 0.41064072
|
|
0.33452988 0.37688804 0.39417863 0.32573199]
|
|
|
|
mean value: 0.4660748720169067
|
|
|
|
key: score_time
|
|
value: [0.01405931 0.02363515 0.02309823 0.02271962 0.02732658 0.02449775
|
|
0.02611279 0.02520847 0.02609611 0.02436471]
|
|
|
|
mean value: 0.02371187210083008
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0. 0. 0. -0.04303315
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: -0.004303314829119352
|
|
|
|
key: train_mcc
|
|
value: [0.15525099 0.15525099 0.15525099 0.34874058 0.15525099 0.38246502
|
|
0.15525099 0.15525099 0. 0.41965652]
|
|
|
|
mean value: 0.2082368076377054
|
|
|
|
key: test_accuracy
|
|
value: [0.91836735 0.91836735 0.91836735 0.91836735 0.91836735 0.89795918
|
|
0.91836735 0.91836735 0.89795918 0.89795918]
|
|
|
|
mean value: 0.9122448979591837
|
|
|
|
key: train_accuracy
|
|
value: [0.91609977 0.91609977 0.91609977 0.92517007 0.91609977 0.92743764
|
|
0.91609977 0.91609977 0.91609977 0.93197279]
|
|
|
|
mean value: 0.9197278911564626
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0.05128205 0.05128205 0.05128205 0.23255814 0.05128205 0.27272727
|
|
0.05128205 0.05128205 0. 0.31818182]
|
|
|
|
mean value: 0.11311595381362823
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 0. 1.]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0.02631579 0.02631579 0.02631579 0.13157895 0.02631579 0.15789474
|
|
0.02631579 0.02631579 0. 0.18918919]
|
|
|
|
mean value: 0.06365576102418208
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.48888889
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.4988888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.51315789 0.51315789 0.51315789 0.56578947 0.51315789 0.57894737
|
|
0.51315789 0.51315789 0.5 0.59459459]
|
|
|
|
mean value: 0.5318278805120911
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0.02631579 0.02631579 0.02631579 0.13157895 0.02631579 0.15789474
|
|
0.02631579 0.02631579 0. 0.18918919]
|
|
|
|
mean value: 0.06365576102418208
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0704453 0.04082274 0.07081413 0.07957244 0.20151687 0.10728359
|
|
0.09266996 0.08612514 0.04564619 0.04352832]
|
|
|
|
mean value: 0.08384246826171875
|
|
|
|
key: score_time
|
|
value: [0.01460814 0.014961 0.01320601 0.01928163 0.02911353 0.02112532
|
|
0.02004409 0.01657796 0.01266456 0.01363707]
|
|
|
|
mean value: 0.017521929740905762
|
|
|
|
key: test_mcc
|
|
value: [0.82222222 0.82548988 0.8675239 0.82222222 0.82548988 0.8230355
|
|
0.89341253 0.78272001 0.86853911 0.87294449]
|
|
|
|
mean value: 0.8403599748206882
|
|
|
|
key: train_mcc
|
|
value: [0.86754475 0.87701119 0.89409611 0.8744215 0.87226845 0.87723864
|
|
0.8676974 0.86960618 0.87764683 0.86301846]
|
|
|
|
mean value: 0.8740549504777019
|
|
|
|
key: test_accuracy
|
|
value: [0.91111111 0.91111111 0.93333333 0.91111111 0.91111111 0.91111111
|
|
0.94382022 0.88764045 0.93258427 0.93258427]
|
|
|
|
mean value: 0.9185518102372034
|
|
|
|
key: train_accuracy
|
|
value: [0.93300248 0.93796526 0.94665012 0.93672457 0.93548387 0.93796526
|
|
0.9330855 0.93432466 0.93804213 0.93060719]
|
|
|
|
mean value: 0.9363851042829338
|
|
|
|
key: test_fscore
|
|
value: [0.91111111 0.91489362 0.93478261 0.91111111 0.91489362 0.91304348
|
|
0.94623656 0.89361702 0.93617021 0.9375 ]
|
|
|
|
mean value: 0.9213359336403636
|
|
|
|
key: train_fscore
|
|
value: [0.93493976 0.93946731 0.94775213 0.93818182 0.93719807 0.93961353
|
|
0.93509615 0.93591294 0.93975904 0.93269231]
|
|
|
|
mean value: 0.9380613046150837
|
|
|
|
key: test_precision
|
|
value: [0.91111111 0.87755102 0.91489362 0.91111111 0.87755102 0.89361702
|
|
0.89795918 0.84 0.89795918 0.88235294]
|
|
|
|
mean value: 0.890410620985983
|
|
|
|
key: train_precision
|
|
value: [0.90866511 0.91725768 0.92857143 0.91706161 0.91294118 0.91529412
|
|
0.9088785 0.91489362 0.91334895 0.9044289 ]
|
|
|
|
mean value: 0.9141341094923939
|
|
|
|
key: test_recall
|
|
value: [0.91111111 0.95555556 0.95555556 0.91111111 0.95555556 0.93333333
|
|
1. 0.95454545 0.97777778 1. ]
|
|
|
|
mean value: 0.9554545454545454
|
|
|
|
key: train_recall
|
|
value: [0.96277916 0.96277916 0.96774194 0.96029777 0.96277916 0.96526055
|
|
0.96287129 0.95792079 0.96774194 0.96277916]
|
|
|
|
mean value: 0.9632950888140923
|
|
|
|
key: test_roc_auc
|
|
value: [0.91111111 0.91111111 0.93333333 0.91111111 0.91111111 0.91111111
|
|
0.94444444 0.88838384 0.93207071 0.93181818]
|
|
|
|
mean value: 0.9185606060606061
|
|
|
|
key: train_roc_auc
|
|
value: [0.93300248 0.93796526 0.94665012 0.93672457 0.93548387 0.93796526
|
|
0.93304855 0.93429538 0.93807889 0.930647 ]
|
|
|
|
mean value: 0.9363861386138614
|
|
|
|
key: test_jcc
|
|
value: [0.83673469 0.84313725 0.87755102 0.83673469 0.84313725 0.84
|
|
0.89795918 0.80769231 0.88 0.88235294]
|
|
|
|
mean value: 0.8545299350509434
|
|
|
|
key: train_jcc
|
|
value: [0.87782805 0.88584475 0.90069284 0.88356164 0.88181818 0.88610478
|
|
0.87810384 0.87954545 0.88636364 0.87387387]
|
|
|
|
mean value: 0.8833737055311376
|
|
|
|
MCC on Blind test: -0.11
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.12755537 3.04009175 3.51895547 3.04520917 3.45551896 2.60019684
|
|
2.039186 1.78104663 2.35084653 2.2768805 ]
|
|
|
|
mean value: 2.523548722267151
|
|
|
|
key: score_time
|
|
value: [0.01369476 0.01410985 0.03243065 0.01931524 0.02969885 0.01097441
|
|
0.02222657 0.01077056 0.0183413 0.01656532]
|
|
|
|
mean value: 0.01881275177001953
|
|
|
|
key: test_mcc
|
|
value: [0.84632727 0.84970583 0.84632727 0.8230355 0.84970583 0.84632727
|
|
0.91106631 0.84823428 0.91097728 0.85305908]
|
|
|
|
mean value: 0.8584765902913816
|
|
|
|
key: train_mcc
|
|
value: [0.95288 0.95041903 0.95299737 0.93566825 0.92852293 0.94791708
|
|
0.95539027 0.97027228 0.90141422 0.9480289 ]
|
|
|
|
mean value: 0.9443510318338347
|
|
|
|
key: test_accuracy
|
|
value: [0.92222222 0.92222222 0.92222222 0.91111111 0.92222222 0.92222222
|
|
0.95505618 0.92134831 0.95505618 0.92134831]
|
|
|
|
mean value: 0.9275031210986268
|
|
|
|
key: train_accuracy
|
|
value: [0.9764268 0.9751861 0.9764268 0.96774194 0.96401985 0.97394541
|
|
0.97769517 0.98513011 0.95043371 0.9739777 ]
|
|
|
|
mean value: 0.9720983577321268
|
|
|
|
key: test_fscore
|
|
value: [0.92473118 0.92631579 0.92473118 0.91304348 0.92631579 0.92473118
|
|
0.95555556 0.92473118 0.95652174 0.92783505]
|
|
|
|
mean value: 0.9304512134623416
|
|
|
|
key: train_fscore
|
|
value: [0.97651422 0.97530864 0.97662977 0.96805897 0.96459096 0.97404203
|
|
0.97772277 0.98511166 0.95121951 0.97410604]
|
|
|
|
mean value: 0.9723304572124235
|
|
|
|
key: test_precision
|
|
value: [0.89583333 0.88 0.89583333 0.89361702 0.88 0.89583333
|
|
0.93478261 0.87755102 0.93617021 0.86538462]
|
|
|
|
mean value: 0.8955005478530984
|
|
|
|
key: train_precision
|
|
value: [0.9729064 0.97051597 0.96829268 0.95863747 0.94951923 0.97044335
|
|
0.97772277 0.98756219 0.9352518 0.96813725]
|
|
|
|
mean value: 0.9658989122288052
|
|
|
|
key: test_recall
|
|
value: [0.95555556 0.97777778 0.95555556 0.93333333 0.97777778 0.95555556
|
|
0.97727273 0.97727273 0.97777778 1. ]
|
|
|
|
mean value: 0.9687878787878788
|
|
|
|
key: train_recall
|
|
value: [0.98014888 0.98014888 0.98511166 0.97766749 0.98014888 0.97766749
|
|
0.97772277 0.98267327 0.96774194 0.98014888]
|
|
|
|
mean value: 0.978918015871066
|
|
|
|
key: test_roc_auc
|
|
value: [0.92222222 0.92222222 0.92222222 0.91111111 0.92222222 0.92222222
|
|
0.95530303 0.9219697 0.95479798 0.92045455]
|
|
|
|
mean value: 0.9274747474747476
|
|
|
|
key: train_roc_auc
|
|
value: [0.9764268 0.9751861 0.9764268 0.96774194 0.96401985 0.97394541
|
|
0.97769513 0.98513316 0.95045513 0.97398533]
|
|
|
|
mean value: 0.9721015649952093
|
|
|
|
key: test_jcc
|
|
value: [0.86 0.8627451 0.86 0.84 0.8627451 0.86
|
|
0.91489362 0.86 0.91666667 0.86538462]
|
|
|
|
mean value: 0.870243509515099
|
|
|
|
key: train_jcc
|
|
value: [0.95410628 0.95180723 0.95432692 0.93809524 0.93160377 0.94939759
|
|
0.95641646 0.97066015 0.90697674 0.94951923]
|
|
|
|
mean value: 0.9462909620772997
|
|
|
|
MCC on Blind test: -0.11
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01563478 0.01409817 0.01399231 0.01384282 0.01369882 0.01375413
|
|
0.01398158 0.01445293 0.01388216 0.01379848]
|
|
|
|
mean value: 0.014113616943359376
|
|
|
|
key: score_time
|
|
value: [0.01122069 0.01092529 0.01084709 0.01096654 0.01085734 0.01080751
|
|
0.01078987 0.01077294 0.01086521 0.0107975 ]
|
|
|
|
mean value: 0.010885000228881836
|
|
|
|
key: test_mcc
|
|
value: [0.27216553 0.25720881 0.41367015 0.41251432 0.44426975 0.49029034
|
|
0.55476755 0.45699664 0.38204659 0.44318095]
|
|
|
|
mean value: 0.41271106253423584
|
|
|
|
key: train_mcc
|
|
value: [0.42380872 0.42074536 0.40873114 0.42297858 0.42521082 0.41769135
|
|
0.40166058 0.41758675 0.43507406 0.41420966]
|
|
|
|
mean value: 0.41876970165431693
|
|
|
|
key: test_accuracy
|
|
value: [0.63333333 0.62222222 0.67777778 0.7 0.71111111 0.72222222
|
|
0.75280899 0.70786517 0.6741573 0.70786517]
|
|
|
|
mean value: 0.690936329588015
|
|
|
|
key: train_accuracy
|
|
value: [0.69851117 0.69727047 0.691067 0.69851117 0.69975186 0.69602978
|
|
0.68773234 0.69516729 0.70508055 0.69392813]
|
|
|
|
mean value: 0.6963049741560354
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.67307692 0.74336283 0.73267327 0.75 0.7706422
|
|
0.79245283 0.75471698 0.73394495 0.75471698]
|
|
|
|
mean value: 0.7372253637344862
|
|
|
|
key: train_fscore
|
|
value: [0.74340021 0.74207188 0.73761855 0.74285714 0.74364407 0.74074074
|
|
0.73529412 0.74159664 0.74680851 0.73917635]
|
|
|
|
mean value: 0.7413208203329961
|
|
|
|
key: test_precision
|
|
value: [0.61111111 0.59322034 0.61764706 0.66071429 0.66101695 0.65625
|
|
0.67741935 0.64516129 0.625 0.6557377 ]
|
|
|
|
mean value: 0.6403278093863842
|
|
|
|
key: train_precision
|
|
value: [0.64705882 0.64640884 0.64102564 0.64760148 0.64879852 0.64575646
|
|
0.63868613 0.64416058 0.65363128 0.64338235]
|
|
|
|
mean value: 0.6456510112356171
|
|
|
|
key: test_recall
|
|
value: [0.73333333 0.77777778 0.93333333 0.82222222 0.86666667 0.93333333
|
|
0.95454545 0.90909091 0.88888889 0.88888889]
|
|
|
|
mean value: 0.8708080808080808
|
|
|
|
key: train_recall
|
|
value: [0.87344913 0.87096774 0.86848635 0.87096774 0.87096774 0.86848635
|
|
0.86633663 0.87376238 0.87096774 0.86848635]
|
|
|
|
mean value: 0.8702878166228534
|
|
|
|
key: test_roc_auc
|
|
value: [0.63333333 0.62222222 0.67777778 0.7 0.71111111 0.72222222
|
|
0.75505051 0.71010101 0.67171717 0.70580808]
|
|
|
|
mean value: 0.6909343434343435
|
|
|
|
key: train_roc_auc
|
|
value: [0.69851117 0.69727047 0.691067 0.69851117 0.69975186 0.69602978
|
|
0.68751075 0.6949457 0.70528585 0.69414417]
|
|
|
|
mean value: 0.6963027909490701
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.50724638 0.5915493 0.578125 0.6 0.62686567
|
|
0.65625 0.60606061 0.57971014 0.60606061]
|
|
|
|
mean value: 0.5851867701276782
|
|
|
|
key: train_jcc
|
|
value: [0.59159664 0.58991597 0.58430718 0.59090909 0.59190556 0.58823529
|
|
0.58139535 0.58931553 0.5959253 0.58626466]
|
|
|
|
mean value: 0.5889770562067759
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01443005 0.02389669 0.0236268 0.02873921 0.01673555 0.01703787
|
|
0.02374792 0.0194304 0.01722026 0.01673079]
|
|
|
|
mean value: 0.02015955448150635
|
|
|
|
key: score_time
|
|
value: [0.01625156 0.01735926 0.0173111 0.01249051 0.01250362 0.01256967
|
|
0.01254129 0.01268768 0.01257706 0.01255965]
|
|
|
|
mean value: 0.013885140419006348
|
|
|
|
key: test_mcc
|
|
value: [0.47087096 0.39197153 0.55766794 0.55776344 0.56454844 0.68041382
|
|
0.71254497 0.52643638 0.58205921 0.62604908]
|
|
|
|
mean value: 0.5670325762236224
|
|
|
|
key: train_mcc
|
|
value: [0.60491348 0.61205374 0.61435952 0.60477094 0.58876121 0.59314421
|
|
0.57745207 0.59203553 0.57918623 0.59086772]
|
|
|
|
mean value: 0.5957544643477605
|
|
|
|
key: test_accuracy
|
|
value: [0.73333333 0.68888889 0.75555556 0.77777778 0.77777778 0.83333333
|
|
0.84269663 0.75280899 0.76404494 0.79775281]
|
|
|
|
mean value: 0.7723970037453183
|
|
|
|
key: train_accuracy
|
|
value: [0.79280397 0.79776675 0.79776675 0.79404467 0.7853598 0.78908189
|
|
0.78066914 0.78810409 0.7819083 0.78810409]
|
|
|
|
mean value: 0.7895609447114423
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.7254902 0.7962963 0.78723404 0.79591837 0.84848485
|
|
0.86 0.78 0.80733945 0.82692308]
|
|
|
|
mean value: 0.7977686277224068
|
|
|
|
key: train_fscore
|
|
value: [0.81587652 0.81868743 0.8198895 0.81555556 0.80883978 0.81026786
|
|
0.80398671 0.81021088 0.80400891 0.80893855]
|
|
|
|
mean value: 0.8116261684870456
|
|
|
|
key: test_precision
|
|
value: [0.70588235 0.64912281 0.68253968 0.75510204 0.73584906 0.77777778
|
|
0.76785714 0.69642857 0.6875 0.72881356]
|
|
|
|
mean value: 0.7186872991304029
|
|
|
|
key: train_precision
|
|
value: [0.73412698 0.74193548 0.73904382 0.73843058 0.72908367 0.73630832
|
|
0.72745491 0.73440644 0.72929293 0.73577236]
|
|
|
|
mean value: 0.7345855493436755
|
|
|
|
key: test_recall
|
|
value: [0.8 0.82222222 0.95555556 0.82222222 0.86666667 0.93333333
|
|
0.97727273 0.88636364 0.97777778 0.95555556]
|
|
|
|
mean value: 0.8996969696969697
|
|
|
|
key: train_recall
|
|
value: [0.91811414 0.91315136 0.92059553 0.91066998 0.90818859 0.90074442
|
|
0.89851485 0.90346535 0.89578164 0.89826303]
|
|
|
|
mean value: 0.9067488882883326
|
|
|
|
key: test_roc_auc
|
|
value: [0.73333333 0.68888889 0.75555556 0.77777778 0.77777778 0.83333333
|
|
0.84419192 0.75429293 0.76161616 0.7959596 ]
|
|
|
|
mean value: 0.7722727272727273
|
|
|
|
key: train_roc_auc
|
|
value: [0.79280397 0.79776675 0.79776675 0.79404467 0.7853598 0.78908189
|
|
0.78052293 0.78796096 0.78204923 0.78824042]
|
|
|
|
mean value: 0.7895597376114782
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.56923077 0.66153846 0.64912281 0.66101695 0.73684211
|
|
0.75438596 0.63934426 0.67692308 0.70491803]
|
|
|
|
mean value: 0.66533224291198
|
|
|
|
key: train_jcc
|
|
value: [0.68901304 0.69303202 0.69475655 0.68855535 0.67903525 0.68105066
|
|
0.67222222 0.68097015 0.67225326 0.67917448]
|
|
|
|
mean value: 0.6830062973344871
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01539993 0.01311874 0.01339102 0.01249838 0.01292109 0.01281643
|
|
0.01273561 0.01300836 0.01299977 0.01288533]
|
|
|
|
mean value: 0.01317746639251709
|
|
|
|
key: score_time
|
|
value: [0.04188466 0.02428293 0.02812815 0.02277112 0.02524447 0.02512622
|
|
0.02565312 0.03656054 0.02717996 0.02539372]
|
|
|
|
mean value: 0.028222489356994628
|
|
|
|
key: test_mcc
|
|
value: [0.62988978 0.66865732 0.72577474 0.82962978 0.64993368 0.70655557
|
|
0.85354573 0.80305603 0.75773523 0.70262451]
|
|
|
|
mean value: 0.7327402363171123
|
|
|
|
key: train_mcc
|
|
value: [0.82659273 0.839979 0.82881419 0.81379037 0.83049845 0.83326852
|
|
0.8234439 0.82567836 0.82472117 0.83349354]
|
|
|
|
mean value: 0.8280280233521584
|
|
|
|
key: test_accuracy
|
|
value: [0.81111111 0.82222222 0.85555556 0.91111111 0.81111111 0.84444444
|
|
0.92134831 0.8988764 0.86516854 0.84269663]
|
|
|
|
mean value: 0.8583645443196005
|
|
|
|
key: train_accuracy
|
|
value: [0.90942928 0.91687345 0.91066998 0.90322581 0.91191067 0.91315136
|
|
0.90830235 0.90954151 0.90954151 0.91325898]
|
|
|
|
mean value: 0.910590490773966
|
|
|
|
key: test_fscore
|
|
value: [0.82474227 0.84313725 0.86868687 0.91666667 0.83495146 0.86
|
|
0.92631579 0.90322581 0.88235294 0.86 ]
|
|
|
|
mean value: 0.8720079051709181
|
|
|
|
key: train_fscore
|
|
value: [0.91521487 0.92163743 0.91627907 0.90930233 0.91715286 0.91841492
|
|
0.91395349 0.91501746 0.9144197 0.91841492]
|
|
|
|
mean value: 0.9159807030054276
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.75438596 0.7962963 0.8627451 0.74137931 0.78181818
|
|
0.8627451 0.85714286 0.78947368 0.78181818]
|
|
|
|
mean value: 0.7997035441852353
|
|
|
|
key: train_precision
|
|
value: [0.86026201 0.87168142 0.86214442 0.85557987 0.86563877 0.86593407
|
|
0.86184211 0.86373626 0.86666667 0.86593407]
|
|
|
|
mean value: 0.8639419647557135
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.95555556 0.95555556 0.97777778 0.95555556 0.95555556
|
|
1. 0.95454545 1. 0.95555556]
|
|
|
|
mean value: 0.9598989898989899
|
|
|
|
key: train_recall
|
|
value: [0.97766749 0.97766749 0.97766749 0.97022333 0.9751861 0.97766749
|
|
0.97277228 0.97277228 0.96774194 0.97766749]
|
|
|
|
mean value: 0.9747033388202344
|
|
|
|
key: test_roc_auc
|
|
value: [0.81111111 0.82222222 0.85555556 0.91111111 0.81111111 0.84444444
|
|
0.92222222 0.89949495 0.86363636 0.84141414]
|
|
|
|
mean value: 0.8582323232323232
|
|
|
|
key: train_roc_auc
|
|
value: [0.90942928 0.91687345 0.91066998 0.90322581 0.91191067 0.91315136
|
|
0.90822237 0.90946306 0.90961354 0.9133387 ]
|
|
|
|
mean value: 0.9105898213890868
|
|
|
|
key: test_jcc
|
|
value: [0.70175439 0.72881356 0.76785714 0.84615385 0.71666667 0.75438596
|
|
0.8627451 0.82352941 0.78947368 0.75438596]
|
|
|
|
mean value: 0.7745765724803612
|
|
|
|
key: train_jcc
|
|
value: [0.84368308 0.85466377 0.84549356 0.8336887 0.84698276 0.84913793
|
|
0.84154176 0.84334764 0.84233261 0.84913793]
|
|
|
|
mean value: 0.8450009748961562
|
|
|
|
MCC on Blind test: -0.12
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04898024 0.04735947 0.04700255 0.0482986 0.04801893 0.04737425
|
|
0.04802537 0.04696679 0.04770064 0.0470624 ]
|
|
|
|
mean value: 0.04767892360687256
|
|
|
|
key: score_time
|
|
value: [0.02052712 0.01983166 0.02064133 0.021106 0.02047706 0.01998162
|
|
0.02032757 0.02039409 0.02001405 0.01892757]
|
|
|
|
mean value: 0.020222806930541994
|
|
|
|
key: test_mcc
|
|
value: [0.89087081 0.71987403 0.84465303 0.8675239 0.77854709 0.84465303
|
|
0.86879834 0.79969743 0.88956845 0.79939579]
|
|
|
|
mean value: 0.8303581895265356
|
|
|
|
key: train_mcc
|
|
value: [0.90821375 0.89826579 0.90821375 0.90821375 0.91068119 0.893325
|
|
0.91079227 0.89839139 0.89841284 0.89343537]
|
|
|
|
mean value: 0.9027945098519337
|
|
|
|
key: test_accuracy
|
|
value: [0.94444444 0.85555556 0.92222222 0.93333333 0.88888889 0.92222222
|
|
0.93258427 0.8988764 0.94382022 0.8988764 ]
|
|
|
|
mean value: 0.9140823970037453
|
|
|
|
key: train_accuracy
|
|
value: [0.95409429 0.94913151 0.95409429 0.95409429 0.95533499 0.94665012
|
|
0.95539033 0.94919455 0.94919455 0.94671623]
|
|
|
|
mean value: 0.9513895166671279
|
|
|
|
key: test_fscore
|
|
value: [0.94252874 0.86597938 0.92134831 0.93181818 0.89130435 0.92307692
|
|
0.93478261 0.9010989 0.94623656 0.90322581]
|
|
|
|
mean value: 0.9161399759789368
|
|
|
|
key: train_fscore
|
|
value: [0.95426452 0.94919455 0.95426452 0.95392279 0.95522388 0.94684796
|
|
0.95533499 0.94932015 0.94894147 0.94671623]
|
|
|
|
mean value: 0.9514031064871785
|
|
|
|
key: test_precision
|
|
value: [0.97619048 0.80769231 0.93181818 0.95348837 0.87234043 0.91304348
|
|
0.89583333 0.87234043 0.91666667 0.875 ]
|
|
|
|
mean value: 0.9014413667118688
|
|
|
|
key: train_precision
|
|
value: [0.95073892 0.9480198 0.95073892 0.9575 0.95760599 0.94334975
|
|
0.95771144 0.94814815 0.9525 0.94554455]
|
|
|
|
mean value: 0.9511857518614164
|
|
|
|
key: test_recall
|
|
value: [0.91111111 0.93333333 0.91111111 0.91111111 0.91111111 0.93333333
|
|
0.97727273 0.93181818 0.97777778 0.93333333]
|
|
|
|
mean value: 0.9331313131313131
|
|
|
|
key: train_recall
|
|
value: [0.95781638 0.95037221 0.95781638 0.95037221 0.9528536 0.95037221
|
|
0.9529703 0.95049505 0.94540943 0.94789082]
|
|
|
|
mean value: 0.9516368572341105
|
|
|
|
key: test_roc_auc
|
|
value: [0.94444444 0.85555556 0.92222222 0.93333333 0.88888889 0.92222222
|
|
0.93308081 0.89924242 0.94343434 0.89848485]
|
|
|
|
mean value: 0.9140909090909091
|
|
|
|
key: train_roc_auc
|
|
value: [0.95409429 0.94913151 0.95409429 0.95409429 0.95533499 0.94665012
|
|
0.95539334 0.94919293 0.94918986 0.94671769]
|
|
|
|
mean value: 0.9513893324816353
|
|
|
|
key: test_jcc
|
|
value: [0.89130435 0.76363636 0.85416667 0.87234043 0.80392157 0.85714286
|
|
0.87755102 0.82 0.89795918 0.82352941]
|
|
|
|
mean value: 0.8461551845277678
|
|
|
|
key: train_jcc
|
|
value: [0.91252955 0.90330189 0.91252955 0.91190476 0.91428571 0.89906103
|
|
0.91448931 0.90352941 0.9028436 0.89882353]
|
|
|
|
mean value: 0.9073298351737726
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [6.17382598 3.52771473 5.99718857 4.37633634 4.23146558 4.23878574
|
|
4.74109864 4.05082345 4.07225037 6.22766924]
|
|
|
|
mean value: 4.763715863227844
|
|
|
|
key: score_time
|
|
value: [0.01211262 0.01379871 0.0151577 0.0115006 0.01392126 0.01218224
|
|
0.01741958 0.01780963 0.02591228 0.02118683]
|
|
|
|
mean value: 0.016100144386291503
|
|
|
|
key: test_mcc
|
|
value: [0.80498447 0.71987403 0.84970583 0.87011096 0.78478493 0.91111111
|
|
0.9347507 0.79969743 0.91371736 0.82751054]
|
|
|
|
mean value: 0.8416247354787947
|
|
|
|
key: train_mcc
|
|
value: [0.97548647 0.89632266 0.93900945 0.91124882 0.91520771 0.95041903
|
|
0.97276513 0.93806739 0.94062545 0.9753251 ]
|
|
|
|
mean value: 0.9414477197372322
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.85555556 0.92222222 0.93333333 0.88888889 0.95555556
|
|
0.96629213 0.8988764 0.95505618 0.91011236]
|
|
|
|
mean value: 0.9185892634207241
|
|
|
|
key: train_accuracy
|
|
value: [0.98759305 0.94789082 0.96898263 0.95409429 0.95657568 0.9751861
|
|
0.98636927 0.96902107 0.97026022 0.98760843]
|
|
|
|
mean value: 0.9703581564536116
|
|
|
|
key: test_fscore
|
|
value: [0.89411765 0.86597938 0.92631579 0.93023256 0.89583333 0.95555556
|
|
0.96703297 0.9010989 0.95744681 0.91666667]
|
|
|
|
mean value: 0.9210279608313403
|
|
|
|
key: train_fscore
|
|
value: [0.98743719 0.94878049 0.96969697 0.95213454 0.95798319 0.97506234
|
|
0.9864365 0.96917386 0.97044335 0.98768473]
|
|
|
|
mean value: 0.9704833157999746
|
|
|
|
key: test_precision
|
|
value: [0.95 0.80769231 0.88 0.97560976 0.84313725 0.95555556
|
|
0.93617021 0.87234043 0.91836735 0.8627451 ]
|
|
|
|
mean value: 0.9001617957523249
|
|
|
|
key: train_precision
|
|
value: [1. 0.93285372 0.9478673 0.99459459 0.92790698 0.97994987
|
|
0.98280098 0.96560197 0.96332518 0.9804401 ]
|
|
|
|
mean value: 0.9675340691206618
|
|
|
|
key: test_recall
|
|
value: [0.84444444 0.93333333 0.97777778 0.88888889 0.95555556 0.95555556
|
|
1. 0.93181818 1. 0.97777778]
|
|
|
|
mean value: 0.9465151515151515
|
|
|
|
key: train_recall
|
|
value: [0.9751861 0.96526055 0.99255583 0.91315136 0.99007444 0.97022333
|
|
0.99009901 0.97277228 0.97766749 0.99503722]
|
|
|
|
mean value: 0.9742027614672137
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.85555556 0.92222222 0.93333333 0.88888889 0.95555556
|
|
0.96666667 0.89924242 0.95454545 0.90934343]
|
|
|
|
mean value: 0.9185353535353535
|
|
|
|
key: train_roc_auc
|
|
value: [0.98759305 0.94789082 0.96898263 0.95409429 0.95657568 0.9751861
|
|
0.98636464 0.96901641 0.97026939 0.98761762]
|
|
|
|
mean value: 0.9703590644424244
|
|
|
|
key: test_jcc
|
|
value: [0.80851064 0.76363636 0.8627451 0.86956522 0.81132075 0.91489362
|
|
0.93617021 0.82 0.91836735 0.84615385]
|
|
|
|
mean value: 0.8551363094961593
|
|
|
|
key: train_jcc
|
|
value: [0.9751861 0.9025522 0.94117647 0.90864198 0.91935484 0.9513382
|
|
0.97323601 0.94019139 0.94258373 0.9756691 ]
|
|
|
|
mean value: 0.9429930021620909
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09120798 0.08327627 0.06607366 0.08031511 0.08555174 0.07450747
|
|
0.08098578 0.07998705 0.09805489 0.12527156]
|
|
|
|
mean value: 0.08652315139770508
|
|
|
|
key: score_time
|
|
value: [0.01105142 0.01083446 0.01094079 0.01092219 0.01089859 0.01094055
|
|
0.01000452 0.01257205 0.01276493 0.02420831]
|
|
|
|
mean value: 0.01251378059387207
|
|
|
|
key: test_mcc
|
|
value: [0.87011096 0.84465303 0.76026311 0.8675239 0.88910845 0.8675239
|
|
0.82112188 0.7979798 1. 1. ]
|
|
|
|
mean value: 0.8718285036250217
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93333333 0.92222222 0.87777778 0.93333333 0.94444444 0.93333333
|
|
0.91011236 0.8988764 1. 1. ]
|
|
|
|
mean value: 0.9353433208489388
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93617021 0.92134831 0.88421053 0.93181818 0.94505495 0.93478261
|
|
0.91111111 0.8988764 1. 1. ]
|
|
|
|
mean value: 0.936337230486276
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.89795918 0.93181818 0.84 0.95348837 0.93478261 0.91489362
|
|
0.89130435 0.88888889 1. 1. ]
|
|
|
|
mean value: 0.9253135200016579
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97777778 0.91111111 0.93333333 0.91111111 0.95555556 0.95555556
|
|
0.93181818 0.90909091 1. 1. ]
|
|
|
|
mean value: 0.9485353535353536
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93333333 0.92222222 0.87777778 0.93333333 0.94444444 0.93333333
|
|
0.91035354 0.8989899 1. 1. ]
|
|
|
|
mean value: 0.9353787878787879
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88 0.85416667 0.79245283 0.87234043 0.89583333 0.87755102
|
|
0.83673469 0.81632653 1. 1. ]
|
|
|
|
mean value: 0.8825405500618554
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22078204 0.24058151 0.29043341 0.19268107 0.21204948 0.20862389
|
|
0.21357036 0.23054552 0.20948625 0.20506454]
|
|
|
|
mean value: 0.2223818063735962
|
|
|
|
key: score_time
|
|
value: [0.02519941 0.0249579 0.0433073 0.02523708 0.02521873 0.02507925
|
|
0.02501249 0.03179884 0.03351307 0.0249033 ]
|
|
|
|
mean value: 0.028422737121582033
|
|
|
|
key: test_mcc
|
|
value: [0.91473203 0.86666667 0.91473203 0.95650071 0.91201231 0.8675239
|
|
0.97776667 0.88956845 1. 0.95603853]
|
|
|
|
mean value: 0.9255541306764525
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95555556 0.93333333 0.95555556 0.97777778 0.95555556 0.93333333
|
|
0.98876404 0.94382022 1. 0.97752809]
|
|
|
|
mean value: 0.9621223470661673
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95348837 0.93333333 0.95348837 0.97826087 0.95454545 0.93181818
|
|
0.98850575 0.94117647 1. 0.97727273]
|
|
|
|
mean value: 0.9611889528435633
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.93333333 1. 0.95744681 0.97674419 0.95348837
|
|
1. 0.97560976 1. 1. ]
|
|
|
|
mean value: 0.9796622456081068
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.91111111 0.93333333 0.91111111 1. 0.93333333 0.91111111
|
|
0.97727273 0.90909091 1. 0.95555556]
|
|
|
|
mean value: 0.9441919191919192
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95555556 0.93333333 0.95555556 0.97777778 0.95555556 0.93333333
|
|
0.98863636 0.94343434 1. 0.97777778]
|
|
|
|
mean value: 0.9620959595959596
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91111111 0.875 0.91111111 0.95744681 0.91304348 0.87234043
|
|
0.97727273 0.88888889 1. 0.95555556]
|
|
|
|
mean value: 0.9261770106242817
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02442193 0.01200891 0.01171684 0.0114646 0.01139045 0.01143217
|
|
0.01167321 0.01173472 0.01180768 0.01175117]
|
|
|
|
mean value: 0.012940168380737305
|
|
|
|
key: score_time
|
|
value: [0.0323236 0.00994706 0.00917459 0.00891662 0.00896883 0.00888515
|
|
0.00901246 0.00906873 0.00906968 0.00912428]
|
|
|
|
mean value: 0.011449098587036133
|
|
|
|
key: test_mcc
|
|
value: [0.73624773 0.8230355 0.80178373 0.75724019 0.73333333 0.8001976
|
|
0.69213213 0.73090625 0.79939579 0.75727397]
|
|
|
|
mean value: 0.7631546223410746
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.91111111 0.9 0.87777778 0.86666667 0.9
|
|
0.84269663 0.86516854 0.8988764 0.87640449]
|
|
|
|
mean value: 0.8805368289637953
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86046512 0.91304348 0.90322581 0.88172043 0.86666667 0.9010989
|
|
0.85106383 0.86046512 0.90322581 0.88421053]
|
|
|
|
mean value: 0.8825185677698353
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90243902 0.89361702 0.875 0.85416667 0.86666667 0.89130435
|
|
0.8 0.88095238 0.875 0.84 ]
|
|
|
|
mean value: 0.8679146107778641
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.82222222 0.93333333 0.93333333 0.91111111 0.86666667 0.91111111
|
|
0.90909091 0.84090909 0.93333333 0.93333333]
|
|
|
|
mean value: 0.8994444444444445
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86666667 0.91111111 0.9 0.87777778 0.86666667 0.9
|
|
0.84343434 0.86489899 0.89848485 0.87575758]
|
|
|
|
mean value: 0.8804797979797979
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75510204 0.84 0.82352941 0.78846154 0.76470588 0.82
|
|
0.74074074 0.75510204 0.82352941 0.79245283]
|
|
|
|
mean value: 0.7903623896905965
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.06794262 3.23345065 4.95417857 5.76033401 3.63348937 3.11895299
|
|
3.15756273 3.12771749 3.07702494 5.82279658]
|
|
|
|
mean value: 3.8953449964523315
|
|
|
|
key: score_time
|
|
value: [0.15030789 0.1020515 0.23300815 0.11171794 0.10320067 0.10296416
|
|
0.10368848 0.1033504 0.10362291 0.26621032]
|
|
|
|
mean value: 0.13801224231719972
|
|
|
|
key: test_mcc
|
|
value: [0.89442719 0.91201231 0.91201231 1. 0.95650071 0.91111111
|
|
0.93282828 0.86595952 0.97776667 0.97777778]
|
|
|
|
mean value: 0.9340395889452306
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94444444 0.95555556 0.95555556 1. 0.97777778 0.95555556
|
|
0.96629213 0.93258427 0.98876404 0.98876404]
|
|
|
|
mean value: 0.9665293383270911
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.95454545 0.95454545 1. 0.97727273 0.95555556
|
|
0.96629213 0.93023256 0.98901099 0.98876404]
|
|
|
|
mean value: 0.9657395389433232
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.97674419 0.97674419 1. 1. 0.95555556
|
|
0.95555556 0.95238095 0.97826087 1. ]
|
|
|
|
mean value: 0.9795241305150304
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.88888889 0.93333333 0.93333333 1. 0.95555556 0.95555556
|
|
0.97727273 0.90909091 1. 0.97777778]
|
|
|
|
mean value: 0.9530808080808081
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94444444 0.95555556 0.95555556 1. 0.97777778 0.95555556
|
|
0.96641414 0.93232323 0.98863636 0.98888889]
|
|
|
|
mean value: 0.9665151515151515
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.91304348 0.91304348 1. 0.95555556 0.91489362
|
|
0.93478261 0.86956522 0.97826087 0.97777778]
|
|
|
|
mean value: 0.9345811491417412
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.11139679 1.15432334 1.2257669 1.14564729 1.129673 1.15977216
|
|
1.22464919 1.20556092 1.14009881 1.19124556]
|
|
|
|
mean value: 1.1688133955001831
|
|
|
|
key: score_time
|
|
value: [0.28581953 0.27785778 0.27359939 0.22735381 0.2859149 0.27192855
|
|
0.28812504 0.30813289 0.26070023 0.14875078]
|
|
|
|
mean value: 0.2628182888031006
|
|
|
|
key: test_mcc
|
|
value: [0.82548988 0.86666667 0.93356387 0.91201231 0.93541435 0.8675239
|
|
0.97777778 0.86595952 0.95505051 0.95603853]
|
|
|
|
mean value: 0.9095497310360912
|
|
|
|
key: train_mcc
|
|
value: [0.9751861 0.97285148 0.97781501 0.97033087 0.97285148 0.97781501
|
|
0.97777055 0.97784276 0.9753242 0.97281277]
|
|
|
|
mean value: 0.9750600246614846
|
|
|
|
key: test_accuracy
|
|
value: [0.91111111 0.93333333 0.96666667 0.95555556 0.96666667 0.93333333
|
|
0.98876404 0.93258427 0.97752809 0.97752809]
|
|
|
|
mean value: 0.9543071161048688
|
|
|
|
key: train_accuracy
|
|
value: [0.98759305 0.98635236 0.98883375 0.98511166 0.98635236 0.98883375
|
|
0.98884758 0.98884758 0.98760843 0.98636927]
|
|
|
|
mean value: 0.9874749785530454
|
|
|
|
key: test_fscore
|
|
value: [0.90697674 0.93333333 0.96703297 0.95454545 0.96551724 0.93181818
|
|
0.98876404 0.93023256 0.97777778 0.97727273]
|
|
|
|
mean value: 0.9533271030429153
|
|
|
|
key: train_fscore
|
|
value: [0.98759305 0.98623279 0.98873592 0.985 0.98623279 0.98873592
|
|
0.98879203 0.98876404 0.9875 0.98626717]
|
|
|
|
mean value: 0.987385371476059
|
|
|
|
key: test_precision
|
|
value: [0.95121951 0.93333333 0.95652174 0.97674419 1. 0.95348837
|
|
0.97777778 0.95238095 0.97777778 1. ]
|
|
|
|
mean value: 0.9679243650734932
|
|
|
|
key: train_precision
|
|
value: [0.98759305 0.99494949 0.99747475 0.99244332 0.99494949 0.99747475
|
|
0.99498747 0.99748111 0.99496222 0.99246231]
|
|
|
|
mean value: 0.9944777967061189
|
|
|
|
key: test_recall
|
|
value: [0.86666667 0.93333333 0.97777778 0.93333333 0.93333333 0.91111111
|
|
1. 0.90909091 0.97777778 0.95555556]
|
|
|
|
mean value: 0.9397979797979799
|
|
|
|
key: train_recall
|
|
value: [0.98759305 0.97766749 0.98014888 0.97766749 0.97766749 0.98014888
|
|
0.98267327 0.98019802 0.98014888 0.98014888]
|
|
|
|
mean value: 0.9804062354126232
|
|
|
|
key: test_roc_auc
|
|
value: [0.91111111 0.93333333 0.96666667 0.95555556 0.96666667 0.93333333
|
|
0.98888889 0.93232323 0.97752525 0.97777778]
|
|
|
|
mean value: 0.9543181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [0.98759305 0.98635236 0.98883375 0.98511166 0.98635236 0.98883375
|
|
0.98885524 0.98885832 0.98759919 0.98636157]
|
|
|
|
mean value: 0.9874751246836843
|
|
|
|
key: test_jcc
|
|
value: [0.82978723 0.875 0.93617021 0.91304348 0.93333333 0.87234043
|
|
0.97777778 0.86956522 0.95652174 0.95555556]
|
|
|
|
mean value: 0.9119094973789701
|
|
|
|
key: train_jcc
|
|
value: [0.9754902 0.97283951 0.97772277 0.97044335 0.97283951 0.97772277
|
|
0.97783251 0.97777778 0.97530864 0.9729064 ]
|
|
|
|
mean value: 0.9750883438741504
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02675605 0.01528788 0.01611495 0.01597047 0.01593041 0.01592159
|
|
0.01603746 0.01596546 0.0160768 0.01598692]
|
|
|
|
mean value: 0.017004799842834473
|
|
|
|
key: score_time
|
|
value: [0.01488733 0.01176882 0.01256204 0.01223779 0.01227236 0.01227903
|
|
0.01224256 0.01219988 0.01224995 0.01227117]
|
|
|
|
mean value: 0.01249709129333496
|
|
|
|
key: test_mcc
|
|
value: [0.47087096 0.39197153 0.55766794 0.55776344 0.56454844 0.68041382
|
|
0.71254497 0.52643638 0.58205921 0.62604908]
|
|
|
|
mean value: 0.5670325762236224
|
|
|
|
key: train_mcc
|
|
value: [0.60491348 0.61205374 0.61435952 0.60477094 0.58876121 0.59314421
|
|
0.57745207 0.59203553 0.57918623 0.59086772]
|
|
|
|
mean value: 0.5957544643477605
|
|
|
|
key: test_accuracy
|
|
value: [0.73333333 0.68888889 0.75555556 0.77777778 0.77777778 0.83333333
|
|
0.84269663 0.75280899 0.76404494 0.79775281]
|
|
|
|
mean value: 0.7723970037453183
|
|
|
|
key: train_accuracy
|
|
value: [0.79280397 0.79776675 0.79776675 0.79404467 0.7853598 0.78908189
|
|
0.78066914 0.78810409 0.7819083 0.78810409]
|
|
|
|
mean value: 0.7895609447114423
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.7254902 0.7962963 0.78723404 0.79591837 0.84848485
|
|
0.86 0.78 0.80733945 0.82692308]
|
|
|
|
mean value: 0.7977686277224068
|
|
|
|
key: train_fscore
|
|
value: [0.81587652 0.81868743 0.8198895 0.81555556 0.80883978 0.81026786
|
|
0.80398671 0.81021088 0.80400891 0.80893855]
|
|
|
|
mean value: 0.8116261684870456
|
|
|
|
key: test_precision
|
|
value: [0.70588235 0.64912281 0.68253968 0.75510204 0.73584906 0.77777778
|
|
0.76785714 0.69642857 0.6875 0.72881356]
|
|
|
|
mean value: 0.7186872991304029
|
|
|
|
key: train_precision
|
|
value: [0.73412698 0.74193548 0.73904382 0.73843058 0.72908367 0.73630832
|
|
0.72745491 0.73440644 0.72929293 0.73577236]
|
|
|
|
mean value: 0.7345855493436755
|
|
|
|
key: test_recall
|
|
value: [0.8 0.82222222 0.95555556 0.82222222 0.86666667 0.93333333
|
|
0.97727273 0.88636364 0.97777778 0.95555556]
|
|
|
|
mean value: 0.8996969696969697
|
|
|
|
key: train_recall
|
|
value: [0.91811414 0.91315136 0.92059553 0.91066998 0.90818859 0.90074442
|
|
0.89851485 0.90346535 0.89578164 0.89826303]
|
|
|
|
mean value: 0.9067488882883326
|
|
|
|
key: test_roc_auc
|
|
value: [0.73333333 0.68888889 0.75555556 0.77777778 0.77777778 0.83333333
|
|
0.84419192 0.75429293 0.76161616 0.7959596 ]
|
|
|
|
mean value: 0.7722727272727273
|
|
|
|
key: train_roc_auc
|
|
value: [0.79280397 0.79776675 0.79776675 0.79404467 0.7853598 0.78908189
|
|
0.78052293 0.78796096 0.78204923 0.78824042]
|
|
|
|
mean value: 0.7895597376114782
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.56923077 0.66153846 0.64912281 0.66101695 0.73684211
|
|
0.75438596 0.63934426 0.67692308 0.70491803]
|
|
|
|
mean value: 0.66533224291198
|
|
|
|
key: train_jcc
|
|
value: [0.68901304 0.69303202 0.69475655 0.68855535 0.67903525 0.68105066
|
|
0.67222222 0.68097015 0.67225326 0.67917448]
|
|
|
|
mean value: 0.6830062973344871
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.15790009 0.1811409 0.29772925 0.11857009 0.12343264 0.12318158
|
|
0.12044573 0.12444019 0.14886975 0.11971283]
|
|
|
|
mean value: 0.1515423059463501
|
|
|
|
key: score_time
|
|
value: [0.01120138 0.01234007 0.01127076 0.01126385 0.01134348 0.01135635
|
|
0.01138687 0.01135921 0.01133037 0.01167846]
|
|
|
|
mean value: 0.011453080177307128
|
|
|
|
key: test_mcc
|
|
value: [0.8675239 0.95555556 0.89442719 0.95555556 0.95555556 0.86666667
|
|
0.89341253 0.84823428 0.97776667 0.97776667]
|
|
|
|
mean value: 0.9192464579773505
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93333333 0.97777778 0.94444444 0.97777778 0.97777778 0.93333333
|
|
0.94382022 0.92134831 0.98876404 0.98876404]
|
|
|
|
mean value: 0.9587141073657928
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93478261 0.97777778 0.94736842 0.97777778 0.97777778 0.93333333
|
|
0.94623656 0.92473118 0.98901099 0.98901099]
|
|
|
|
mean value: 0.9597807416372413
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91489362 0.97777778 0.9 0.97777778 0.97777778 0.93333333
|
|
0.89795918 0.87755102 0.97826087 0.97826087]
|
|
|
|
mean value: 0.941359222690001
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95555556 0.97777778 1. 0.97777778 0.97777778 0.93333333
|
|
1. 0.97727273 1. 1. ]
|
|
|
|
mean value: 0.9799494949494949
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93333333 0.97777778 0.94444444 0.97777778 0.97777778 0.93333333
|
|
0.94444444 0.9219697 0.98863636 0.98863636]
|
|
|
|
mean value: 0.9588131313131313
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.87755102 0.95652174 0.9 0.95652174 0.95652174 0.875
|
|
0.89795918 0.86 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9236597160603371
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.85
|
|
|
|
Accuracy on Blind test: 0.98
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05275273 0.07505655 0.08556962 0.0726521 0.09294224 0.08415771
|
|
0.07687783 0.09366059 0.10185647 0.07463193]
|
|
|
|
mean value: 0.08101577758789062
|
|
|
|
key: score_time
|
|
value: [0.02001882 0.01260519 0.01305866 0.01266956 0.02176833 0.02964878
|
|
0.01262093 0.02493691 0.0202713 0.02356553]
|
|
|
|
mean value: 0.01911640167236328
|
|
|
|
key: test_mcc
|
|
value: [0.82222222 0.78478493 0.80178373 0.89442719 0.87011096 0.91111111
|
|
0.86879834 0.87330789 0.84285003 0.83347626]
|
|
|
|
mean value: 0.8502872654379601
|
|
|
|
key: train_mcc
|
|
value: [0.90164424 0.91157971 0.92868322 0.91121998 0.91378456 0.91913623
|
|
0.9188469 0.9236486 0.9190357 0.91389449]
|
|
|
|
mean value: 0.9161473647041956
|
|
|
|
key: test_accuracy
|
|
value: [0.91111111 0.88888889 0.9 0.94444444 0.93333333 0.95555556
|
|
0.93258427 0.93258427 0.92134831 0.91011236]
|
|
|
|
mean value: 0.9229962546816479
|
|
|
|
key: train_accuracy
|
|
value: [0.95037221 0.95533499 0.96401985 0.95533499 0.95657568 0.95905707
|
|
0.95910781 0.96158612 0.95910781 0.95662949]
|
|
|
|
mean value: 0.9577126015847686
|
|
|
|
key: test_fscore
|
|
value: [0.91111111 0.89583333 0.90322581 0.94736842 0.93617021 0.95555556
|
|
0.93478261 0.93617021 0.92307692 0.91836735]
|
|
|
|
mean value: 0.926166153174751
|
|
|
|
key: train_fscore
|
|
value: [0.95145631 0.95631068 0.96467722 0.95609756 0.95736906 0.96
|
|
0.95990279 0.96224117 0.95990279 0.95736906]
|
|
|
|
mean value: 0.9585326657017645
|
|
|
|
key: test_precision
|
|
value: [0.91111111 0.84313725 0.875 0.9 0.89795918 0.95555556
|
|
0.89583333 0.88 0.91304348 0.8490566 ]
|
|
|
|
mean value: 0.8920696520609884
|
|
|
|
key: train_precision
|
|
value: [0.93111639 0.93586698 0.94736842 0.94004796 0.94019139 0.93838863
|
|
0.94272076 0.94724221 0.94047619 0.94019139]
|
|
|
|
mean value: 0.940361031675133
|
|
|
|
key: test_recall
|
|
value: [0.91111111 0.95555556 0.93333333 1. 0.97777778 0.95555556
|
|
0.97727273 1. 0.93333333 1. ]
|
|
|
|
mean value: 0.9643939393939394
|
|
|
|
key: train_recall
|
|
value: [0.97270471 0.97766749 0.98263027 0.97270471 0.9751861 0.98263027
|
|
0.97772277 0.97772277 0.98014888 0.9751861 ]
|
|
|
|
mean value: 0.97743041053485
|
|
|
|
key: test_roc_auc
|
|
value: [0.91111111 0.88888889 0.9 0.94444444 0.93333333 0.95555556
|
|
0.93308081 0.93333333 0.92121212 0.90909091]
|
|
|
|
mean value: 0.9230050505050506
|
|
|
|
key: train_roc_auc
|
|
value: [0.95037221 0.95533499 0.96401985 0.95533499 0.95657568 0.95905707
|
|
0.95908471 0.9615661 0.95913385 0.95665246]
|
|
|
|
mean value: 0.957713190673906
|
|
|
|
key: test_jcc
|
|
value: [0.83673469 0.81132075 0.82352941 0.9 0.88 0.91489362
|
|
0.87755102 0.88 0.85714286 0.8490566 ]
|
|
|
|
mean value: 0.863022895870512
|
|
|
|
key: train_jcc
|
|
value: [0.90740741 0.91627907 0.93176471 0.91588785 0.9182243 0.92307692
|
|
0.9228972 0.92723005 0.9228972 0.9182243 ]
|
|
|
|
mean value: 0.9203888994203977
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01605797 0.0153873 0.01538062 0.01534128 0.01532412 0.01517677
|
|
0.01525378 0.01524448 0.01526618 0.01519775]
|
|
|
|
mean value: 0.015363025665283202
|
|
|
|
key: score_time
|
|
value: [0.01218605 0.01219034 0.01210594 0.01214838 0.01219964 0.01208615
|
|
0.01204348 0.01202822 0.01199102 0.01202393]
|
|
|
|
mean value: 0.012100315093994141
|
|
|
|
key: test_mcc
|
|
value: [0.28888889 0.11691309 0.43201382 0.44455422 0.27216553 0.40418948
|
|
0.63179558 0.40284686 0.38204659 0.48342527]
|
|
|
|
mean value: 0.38588393426522694
|
|
|
|
key: train_mcc
|
|
value: [0.38259375 0.38836477 0.36959544 0.38202231 0.40293538 0.3955253
|
|
0.37351317 0.39248191 0.37375847 0.37702345]
|
|
|
|
mean value: 0.3837813958239772
|
|
|
|
key: test_accuracy
|
|
value: [0.64444444 0.55555556 0.7 0.72222222 0.63333333 0.68888889
|
|
0.80898876 0.69662921 0.6741573 0.73033708]
|
|
|
|
mean value: 0.6854556803995007
|
|
|
|
key: train_accuracy
|
|
value: [0.68362283 0.68734491 0.67866005 0.68362283 0.69602978 0.69230769
|
|
0.6802974 0.69021066 0.68153656 0.68277571]
|
|
|
|
mean value: 0.6856408411510941
|
|
|
|
key: test_fscore
|
|
value: [0.64444444 0.61538462 0.74766355 0.72527473 0.66666667 0.73584906
|
|
0.82474227 0.72164948 0.73394495 0.76923077]
|
|
|
|
mean value: 0.7184850535712624
|
|
|
|
key: train_fscore
|
|
value: [0.72252448 0.72368421 0.71507151 0.7219193 0.72747497 0.72444444
|
|
0.71772429 0.7246696 0.71476138 0.71681416]
|
|
|
|
mean value: 0.7209088347423753
|
|
|
|
key: test_precision
|
|
value: [0.64444444 0.54237288 0.64516129 0.7173913 0.61111111 0.63934426
|
|
0.75471698 0.66037736 0.625 0.6779661 ]
|
|
|
|
mean value: 0.6517885735194533
|
|
|
|
key: train_precision
|
|
value: [0.64341085 0.64833006 0.64229249 0.64396887 0.65927419 0.65593561
|
|
0.64313725 0.65277778 0.64658635 0.64670659]
|
|
|
|
mean value: 0.6482420045484274
|
|
|
|
key: test_recall
|
|
value: [0.64444444 0.71111111 0.88888889 0.73333333 0.73333333 0.86666667
|
|
0.90909091 0.79545455 0.88888889 0.88888889]
|
|
|
|
mean value: 0.806010101010101
|
|
|
|
key: train_recall
|
|
value: [0.82382134 0.81885856 0.80645161 0.82133995 0.81141439 0.808933
|
|
0.81188119 0.81435644 0.79900744 0.80397022]
|
|
|
|
mean value: 0.8120034149816967
|
|
|
|
key: test_roc_auc
|
|
value: [0.64444444 0.55555556 0.7 0.72222222 0.63333333 0.68888889
|
|
0.81010101 0.69772727 0.67171717 0.72853535]
|
|
|
|
mean value: 0.6852525252525252
|
|
|
|
key: train_roc_auc
|
|
value: [0.68362283 0.68734491 0.67866005 0.68362283 0.69602978 0.69230769
|
|
0.68013414 0.69005663 0.68168194 0.68292571]
|
|
|
|
mean value: 0.6856386507137066
|
|
|
|
key: test_jcc
|
|
value: [0.47540984 0.44444444 0.59701493 0.56896552 0.5 0.58208955
|
|
0.70175439 0.56451613 0.57971014 0.625 ]
|
|
|
|
mean value: 0.5638904935288045
|
|
|
|
key: train_jcc
|
|
value: [0.56558773 0.56701031 0.55650685 0.56484642 0.57167832 0.56794425
|
|
0.55972696 0.56822107 0.55613126 0.55862069]
|
|
|
|
mean value: 0.5636273865485709
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0337944 0.03159857 0.03636742 0.03205371 0.03201866 0.02848721
|
|
0.03344822 0.02441978 0.03174496 0.03047705]
|
|
|
|
mean value: 0.03144099712371826
|
|
|
|
key: score_time
|
|
value: [0.01208568 0.01208949 0.01210642 0.01205373 0.01204181 0.01204824
|
|
0.01209497 0.01204872 0.01208138 0.01203513]
|
|
|
|
mean value: 0.012068557739257812
|
|
|
|
key: test_mcc
|
|
value: [0.88910845 0.84632727 0.65465367 0.75574218 0.84465303 0.8230355
|
|
0.83410221 0.61011921 0.66736889 0.68516318]
|
|
|
|
mean value: 0.7610273578750819
|
|
|
|
key: train_mcc
|
|
value: [0.8841617 0.88674103 0.73159968 0.9058857 0.87662169 0.89170877
|
|
0.81981143 0.70670256 0.7247966 0.73280224]
|
|
|
|
mean value: 0.8160831410117942
|
|
|
|
key: test_accuracy
|
|
value: [0.94444444 0.92222222 0.8 0.87777778 0.92222222 0.91111111
|
|
0.91011236 0.79775281 0.80898876 0.82022472]
|
|
|
|
mean value: 0.8714856429463171
|
|
|
|
key: train_accuracy
|
|
value: [0.94168734 0.94292804 0.84863524 0.9528536 0.93796526 0.94540943
|
|
0.90334572 0.83890954 0.84510533 0.85006196]
|
|
|
|
mean value: 0.9006901460852774
|
|
|
|
key: test_fscore
|
|
value: [0.94382022 0.91954023 0.83333333 0.87912088 0.92307692 0.91304348
|
|
0.91666667 0.76923077 0.8411215 0.8490566 ]
|
|
|
|
mean value: 0.8788010603394287
|
|
|
|
key: train_fscore
|
|
value: [0.94043093 0.94162437 0.86853448 0.95331695 0.93917275 0.94660194
|
|
0.91136364 0.81268012 0.86544672 0.86918919]
|
|
|
|
mean value: 0.90483610756454
|
|
|
|
key: test_precision
|
|
value: [0.95454545 0.95238095 0.71428571 0.86956522 0.91304348 0.89361702
|
|
0.84615385 0.88235294 0.72580645 0.73770492]
|
|
|
|
mean value: 0.8489455995116898
|
|
|
|
key: train_precision
|
|
value: [0.9611399 0.96363636 0.76761905 0.94403893 0.92124105 0.9263658
|
|
0.84243697 0.97241379 0.76425856 0.77011494]
|
|
|
|
mean value: 0.8833265348467835
|
|
|
|
key: test_recall
|
|
value: [0.93333333 0.88888889 1. 0.88888889 0.93333333 0.93333333
|
|
1. 0.68181818 1. 1. ]
|
|
|
|
mean value: 0.925959595959596
|
|
|
|
key: train_recall
|
|
value: [0.92059553 0.92059553 1. 0.96277916 0.95781638 0.96774194
|
|
0.99257426 0.6980198 0.99751861 0.99751861]
|
|
|
|
mean value: 0.9415159816229762
|
|
|
|
key: test_roc_auc
|
|
value: [0.94444444 0.92222222 0.8 0.87777778 0.92222222 0.91111111
|
|
0.91111111 0.79646465 0.80681818 0.81818182]
|
|
|
|
mean value: 0.8710353535353536
|
|
|
|
key: train_roc_auc
|
|
value: [0.94168734 0.94292804 0.84863524 0.9528536 0.93796526 0.94540943
|
|
0.90323502 0.83908434 0.84529396 0.85024445]
|
|
|
|
mean value: 0.9007336682799794
|
|
|
|
key: test_jcc
|
|
value: [0.89361702 0.85106383 0.71428571 0.78431373 0.85714286 0.84
|
|
0.84615385 0.625 0.72580645 0.73770492]
|
|
|
|
mean value: 0.7875088363782133
|
|
|
|
key: train_jcc
|
|
value: [0.88755981 0.88968825 0.76761905 0.91079812 0.8853211 0.89861751
|
|
0.83716075 0.68446602 0.76280835 0.76864245]
|
|
|
|
mean value: 0.829268140768395
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02327752 0.02822757 0.02676415 0.02980375 0.03015065 0.02102804
|
|
0.03098822 0.02771735 0.02987862 0.02319121]
|
|
|
|
mean value: 0.02710270881652832
|
|
|
|
key: score_time
|
|
value: [0.01051235 0.01204324 0.01203513 0.0120542 0.0121026 0.01203537
|
|
0.01206779 0.01216531 0.02876282 0.01466727]
|
|
|
|
mean value: 0.013844609260559082
|
|
|
|
key: test_mcc
|
|
value: [0.82962978 0.80985829 0.59439629 0.76088591 0.84465303 0.84632727
|
|
0.93282828 0.74089667 0.78768651 0.66736889]
|
|
|
|
mean value: 0.7814530916811608
|
|
|
|
key: train_mcc
|
|
value: [0.8394092 0.89097741 0.56556029 0.82178723 0.88589972 0.83773827
|
|
0.90334805 0.77944833 0.86820215 0.7662229 ]
|
|
|
|
mean value: 0.8158593546555849
|
|
|
|
key: test_accuracy
|
|
value: [0.91111111 0.9 0.77777778 0.86666667 0.92222222 0.92222222
|
|
0.96629213 0.85393258 0.88764045 0.80898876]
|
|
|
|
mean value: 0.881685393258427
|
|
|
|
key: train_accuracy
|
|
value: [0.91563275 0.94416873 0.74937965 0.9044665 0.94292804 0.91563275
|
|
0.95167286 0.87856258 0.93184634 0.87112763]
|
|
|
|
mean value: 0.90054178543206
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.90721649 0.72972973 0.88235294 0.92134831 0.92473118
|
|
0.96629213 0.87128713 0.89795918 0.8411215 ]
|
|
|
|
mean value: 0.8858705272365572
|
|
|
|
key: train_fscore
|
|
value: [0.92111369 0.94623656 0.67207792 0.91220068 0.94264339 0.92056075
|
|
0.95179234 0.89159292 0.93506494 0.88520971]
|
|
|
|
mean value: 0.8978492898308849
|
|
|
|
key: test_precision
|
|
value: [0.8627451 0.84615385 0.93103448 0.78947368 0.93181818 0.89583333
|
|
0.95555556 0.77192982 0.83018868 0.72580645]
|
|
|
|
mean value: 0.854053913728887
|
|
|
|
key: train_precision
|
|
value: [0.86492375 0.9124424 0.97183099 0.84388186 0.94736842 0.86975717
|
|
0.95061728 0.806 0.89189189 0.7972167 ]
|
|
|
|
mean value: 0.88559304571349
|
|
|
|
key: test_recall
|
|
value: [0.97777778 0.97777778 0.6 1. 0.91111111 0.95555556
|
|
0.97727273 1. 0.97777778 1. ]
|
|
|
|
mean value: 0.9377272727272727
|
|
|
|
key: train_recall
|
|
value: [0.98511166 0.98263027 0.51364764 0.99255583 0.93796526 0.97766749
|
|
0.9529703 0.99752475 0.98263027 0.99503722]
|
|
|
|
mean value: 0.9317740707073189
|
|
|
|
key: test_roc_auc
|
|
value: [0.91111111 0.9 0.77777778 0.86666667 0.92222222 0.92222222
|
|
0.96641414 0.85555556 0.88661616 0.80681818]
|
|
|
|
mean value: 0.881540404040404
|
|
|
|
key: train_roc_auc
|
|
value: [0.91563275 0.94416873 0.74937965 0.9044665 0.94292804 0.91563275
|
|
0.95167125 0.87841498 0.9319092 0.87128099]
|
|
|
|
mean value: 0.9005484853696288
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.83018868 0.57446809 0.78947368 0.85416667 0.86
|
|
0.93478261 0.77192982 0.81481481 0.72580645]
|
|
|
|
mean value: 0.8001784661067479
|
|
|
|
key: train_jcc
|
|
value: [0.85376344 0.89795918 0.50611247 0.83857442 0.89150943 0.85281385
|
|
0.90801887 0.80439122 0.87804878 0.79405941]
|
|
|
|
mean value: 0.8225251076145336
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.37636113 0.36746764 0.37343693 0.36896634 0.37034321 0.37079883
|
|
0.36822867 0.36799383 0.3669591 0.36878753]
|
|
|
|
mean value: 0.3699343204498291
|
|
|
|
key: score_time
|
|
value: [0.01578689 0.01575446 0.01571369 0.01567411 0.01578212 0.01596189
|
|
0.01565671 0.01563311 0.01580811 0.01572847]
|
|
|
|
mean value: 0.01574995517730713
|
|
|
|
key: test_mcc
|
|
value: [0.84465303 0.93356387 0.93356387 0.93541435 0.95650071 0.88910845
|
|
0.8660941 0.8660941 0.97776667 0.93465477]
|
|
|
|
mean value: 0.9137413921397143
|
|
|
|
key: train_mcc
|
|
value: [0.99255889 0.98512379 0.98759609 0.98759609 0.98512379 0.99008663
|
|
0.99504336 0.97776986 0.98265484 0.97777055]
|
|
|
|
mean value: 0.9861323908469741
|
|
|
|
key: test_accuracy
|
|
value: [0.92222222 0.96666667 0.96666667 0.96666667 0.97777778 0.94444444
|
|
0.93258427 0.93258427 0.98876404 0.96629213]
|
|
|
|
mean value: 0.9564669163545568
|
|
|
|
key: train_accuracy
|
|
value: [0.99627792 0.99255583 0.99379653 0.99379653 0.99255583 0.99503722
|
|
0.99752169 0.98884758 0.9913259 0.98884758]
|
|
|
|
mean value: 0.9930562602045994
|
|
|
|
key: test_fscore
|
|
value: [0.92134831 0.96629213 0.96703297 0.96774194 0.97727273 0.94382022
|
|
0.93333333 0.93333333 0.98901099 0.96774194]
|
|
|
|
mean value: 0.9566927895108396
|
|
|
|
key: train_fscore
|
|
value: [0.99628253 0.99257426 0.99380421 0.99380421 0.99257426 0.9950495
|
|
0.99752475 0.98892989 0.9913259 0.98890259]
|
|
|
|
mean value: 0.9930772103512201
|
|
|
|
key: test_precision
|
|
value: [0.93181818 0.97727273 0.95652174 0.9375 1. 0.95454545
|
|
0.91304348 0.91304348 0.97826087 0.9375 ]
|
|
|
|
mean value: 0.9499505928853755
|
|
|
|
key: train_precision
|
|
value: [0.9950495 0.99012346 0.99257426 0.99257426 0.99012346 0.99259259
|
|
0.99752475 0.98288509 0.99009901 0.98284314]
|
|
|
|
mean value: 0.9906389511180531
|
|
|
|
key: test_recall
|
|
value: [0.91111111 0.95555556 0.97777778 1. 0.95555556 0.93333333
|
|
0.95454545 0.95454545 1. 1. ]
|
|
|
|
mean value: 0.9642424242424242
|
|
|
|
key: train_recall
|
|
value: [0.99751861 0.99503722 0.99503722 0.99503722 0.99503722 0.99751861
|
|
0.99752475 0.9950495 0.99255583 0.99503722]
|
|
|
|
mean value: 0.9955353413753286
|
|
|
|
key: test_roc_auc
|
|
value: [0.92222222 0.96666667 0.96666667 0.96666667 0.97777778 0.94444444
|
|
0.93282828 0.93282828 0.98863636 0.96590909]
|
|
|
|
mean value: 0.9564646464646465
|
|
|
|
key: train_roc_auc
|
|
value: [0.99627792 0.99255583 0.99379653 0.99379653 0.99255583 0.99503722
|
|
0.99752168 0.98883989 0.99132742 0.98885524]
|
|
|
|
mean value: 0.9930564086185293
|
|
|
|
key: test_jcc
|
|
value: [0.85416667 0.93478261 0.93617021 0.9375 0.95555556 0.89361702
|
|
0.875 0.875 0.97826087 0.9375 ]
|
|
|
|
mean value: 0.9177552934525645
|
|
|
|
key: train_jcc
|
|
value: [0.99259259 0.98525799 0.98768473 0.98768473 0.98525799 0.99014778
|
|
0.99506173 0.97810219 0.98280098 0.97804878]
|
|
|
|
mean value: 0.9862639485952744
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.2280879 0.23964858 0.24391055 0.13132954 0.24404192 0.24290991
|
|
0.23835158 0.25424933 0.14319587 0.2404716 ]
|
|
|
|
mean value: 0.22061967849731445
|
|
|
|
key: score_time
|
|
value: [0.03775358 0.0385406 0.03522134 0.02761555 0.04224634 0.0341239
|
|
0.03649116 0.03590512 0.02610898 0.03781915]
|
|
|
|
mean value: 0.03518257141113281
|
|
|
|
key: test_mcc
|
|
value: [0.91201231 0.88910845 0.89087081 0.8675239 0.88910845 0.86666667
|
|
0.89341253 0.86515152 0.97777778 1. ]
|
|
|
|
mean value: 0.9051632402749425
|
|
|
|
key: train_mcc
|
|
value: [0.99752168 0.99008663 1. 0.99255889 0.99012321 0.99503722
|
|
0.99752474 0.99256807 0.99256807 0.99008673]
|
|
|
|
mean value: 0.993807524086899
|
|
|
|
key: test_accuracy
|
|
value: [0.95555556 0.94444444 0.94444444 0.93333333 0.94444444 0.93333333
|
|
0.94382022 0.93258427 0.98876404 1. ]
|
|
|
|
mean value: 0.9520724094881399
|
|
|
|
key: train_accuracy
|
|
value: [0.99875931 0.99503722 1. 0.99627792 0.99503722 0.99751861
|
|
0.99876084 0.99628253 0.99628253 0.99504337]
|
|
|
|
mean value: 0.9968999541850003
|
|
|
|
key: test_fscore
|
|
value: [0.95652174 0.94382022 0.94623656 0.93478261 0.94505495 0.93333333
|
|
0.94623656 0.93181818 0.98876404 1. ]
|
|
|
|
mean value: 0.9526568195975038
|
|
|
|
key: train_fscore
|
|
value: [0.99875776 0.9950495 1. 0.99627329 0.99501247 0.99751861
|
|
0.99876391 0.99629172 0.99627329 0.99503722]
|
|
|
|
mean value: 0.9968977777097462
|
|
|
|
key: test_precision
|
|
value: [0.93617021 0.95454545 0.91666667 0.91489362 0.93478261 0.93333333
|
|
0.89795918 0.93181818 1. 1. ]
|
|
|
|
mean value: 0.9420169258519993
|
|
|
|
key: train_precision
|
|
value: [1. 0.99259259 1. 0.99751244 1. 0.99751861
|
|
0.99753086 0.99506173 0.99751244 0.99503722]
|
|
|
|
mean value: 0.9972765892072584
|
|
|
|
key: test_recall
|
|
value: [0.97777778 0.93333333 0.97777778 0.95555556 0.95555556 0.93333333
|
|
1. 0.93181818 0.97777778 1. ]
|
|
|
|
mean value: 0.9642929292929293
|
|
|
|
key: train_recall
|
|
value: [0.99751861 0.99751861 1. 0.99503722 0.99007444 0.99751861
|
|
1. 0.99752475 0.99503722 0.99503722]
|
|
|
|
mean value: 0.9965266687959118
|
|
|
|
key: test_roc_auc
|
|
value: [0.95555556 0.94444444 0.94444444 0.93333333 0.94444444 0.93333333
|
|
0.94444444 0.93257576 0.98888889 1. ]
|
|
|
|
mean value: 0.9521464646464647
|
|
|
|
key: train_roc_auc
|
|
value: [0.99875931 0.99503722 1. 0.99627792 0.99503722 0.99751861
|
|
0.99875931 0.99628099 0.99628099 0.99504336]
|
|
|
|
mean value: 0.9968994914379775
|
|
|
|
key: test_jcc
|
|
value: [0.91666667 0.89361702 0.89795918 0.87755102 0.89583333 0.875
|
|
0.89795918 0.87234043 0.97777778 1. ]
|
|
|
|
mean value: 0.910470461234139
|
|
|
|
key: train_jcc
|
|
value: [0.99751861 0.99014778 1. 0.99257426 0.99007444 0.9950495
|
|
0.99753086 0.99261084 0.99257426 0.99012346]
|
|
|
|
mean value: 0.9938204013588471
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.4049921 0.41999841 0.33586693 0.48077893 0.4214468 0.37379622
|
|
0.44452333 0.3556335 0.45986605 0.47766662]
|
|
|
|
mean value: 0.4174568891525269
|
|
|
|
key: score_time
|
|
value: [0.02312779 0.01993775 0.02042317 0.01991701 0.02000713 0.02371883
|
|
0.03467965 0.03444147 0.01985121 0.03443623]
|
|
|
|
mean value: 0.025054025650024413
|
|
|
|
key: test_mcc
|
|
value: [0.75574218 0.53780023 0.82548988 0.78478493 0.69509522 0.76486616
|
|
0.85354573 0.77614967 0.7951298 0.74189677]
|
|
|
|
mean value: 0.7530500570612386
|
|
|
|
key: train_mcc
|
|
value: [0.97270771 0.96278212 0.96278212 0.96030959 0.9653081 0.96776875
|
|
0.96535055 0.96785568 0.96035894 0.97276563]
|
|
|
|
mean value: 0.9657989194695036
|
|
|
|
key: test_accuracy
|
|
value: [0.87777778 0.75555556 0.91111111 0.88888889 0.84444444 0.87777778
|
|
0.92134831 0.88764045 0.88764045 0.86516854]
|
|
|
|
mean value: 0.8717353308364544
|
|
|
|
key: train_accuracy
|
|
value: [0.98635236 0.98138958 0.98138958 0.98014888 0.98263027 0.98387097
|
|
0.9826518 0.98389095 0.98017348 0.98636927]
|
|
|
|
mean value: 0.9828867139575858
|
|
|
|
key: test_fscore
|
|
value: [0.87640449 0.78846154 0.91489362 0.89583333 0.85416667 0.88659794
|
|
0.92631579 0.88888889 0.9 0.87755102]
|
|
|
|
mean value: 0.8809113286779904
|
|
|
|
key: train_fscore
|
|
value: [0.98636927 0.98141264 0.98141264 0.98019802 0.98271605 0.98393078
|
|
0.98275862 0.98400984 0.98019802 0.98640297]
|
|
|
|
mean value: 0.9829408842846938
|
|
|
|
key: test_precision
|
|
value: [0.88636364 0.69491525 0.87755102 0.84313725 0.80392157 0.82692308
|
|
0.8627451 0.86956522 0.81818182 0.81132075]
|
|
|
|
mean value: 0.8294624699790896
|
|
|
|
key: train_precision
|
|
value: [0.98514851 0.98019802 0.98019802 0.97777778 0.97788698 0.98029557
|
|
0.97794118 0.97799511 0.97777778 0.98275862]
|
|
|
|
mean value: 0.9797977561585135
|
|
|
|
key: test_recall
|
|
value: [0.86666667 0.91111111 0.95555556 0.95555556 0.91111111 0.95555556
|
|
1. 0.90909091 1. 0.95555556]
|
|
|
|
mean value: 0.942020202020202
|
|
|
|
key: train_recall
|
|
value: [0.98759305 0.98263027 0.98263027 0.98263027 0.98759305 0.98759305
|
|
0.98762376 0.99009901 0.98263027 0.99007444]
|
|
|
|
mean value: 0.986109746210353
|
|
|
|
key: test_roc_auc
|
|
value: [0.87777778 0.75555556 0.91111111 0.88888889 0.84444444 0.87777778
|
|
0.92222222 0.88787879 0.88636364 0.86414141]
|
|
|
|
mean value: 0.8716161616161616
|
|
|
|
key: train_roc_auc
|
|
value: [0.98635236 0.98138958 0.98138958 0.98014888 0.98263027 0.98387097
|
|
0.98264563 0.98388325 0.98017652 0.98637385]
|
|
|
|
mean value: 0.982886089477434
|
|
|
|
key: test_jcc
|
|
value: [0.78 0.65079365 0.84313725 0.81132075 0.74545455 0.7962963
|
|
0.8627451 0.8 0.81818182 0.78181818]
|
|
|
|
mean value: 0.788974760020265
|
|
|
|
key: train_jcc
|
|
value: [0.97310513 0.96350365 0.96350365 0.96116505 0.96601942 0.96836983
|
|
0.96610169 0.968523 0.96116505 0.97317073]
|
|
|
|
mean value: 0.9664627207035085
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.64468813 1.637321 1.61321831 1.61550736 1.60727763 1.61166739
|
|
1.61358714 1.62627339 1.61646795 1.62588835]
|
|
|
|
mean value: 1.6211896657943725
|
|
|
|
key: score_time
|
|
value: [0.0095973 0.00957012 0.00943494 0.00956225 0.00962305 0.00950909
|
|
0.00976372 0.0096252 0.00968647 0.00968337]
|
|
|
|
mean value: 0.00960555076599121
|
|
|
|
key: test_mcc
|
|
value: [0.84465303 0.91201231 0.91473203 0.91473203 0.93356387 0.8675239
|
|
0.85354573 0.8660941 1. 0.91097728]
|
|
|
|
mean value: 0.90178342770123
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92222222 0.95555556 0.95555556 0.95555556 0.96666667 0.93333333
|
|
0.92134831 0.93258427 1. 0.95505618]
|
|
|
|
mean value: 0.9497877652933833
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.95454545 0.95744681 0.95744681 0.96703297 0.93478261
|
|
0.92631579 0.93333333 1. 0.95652174]
|
|
|
|
mean value: 0.9510502432309725
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91304348 0.97674419 0.91836735 0.91836735 0.95652174 0.91489362
|
|
0.8627451 0.91304348 1. 0.93617021]
|
|
|
|
mean value: 0.9309896503402686
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.93333333 0.93333333 1. 1. 0.97777778 0.95555556
|
|
1. 0.95454545 1. 0.97777778]
|
|
|
|
mean value: 0.9732323232323232
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92222222 0.95555556 0.95555556 0.95555556 0.96666667 0.93333333
|
|
0.92222222 0.93282828 1. 0.95479798]
|
|
|
|
mean value: 0.9498737373737374
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.91304348 0.91836735 0.91836735 0.93617021 0.87755102
|
|
0.8627451 0.875 1. 0.91666667]
|
|
|
|
mean value: 0.9075054027161281
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03815174 0.03756595 0.03726172 0.03956819 0.03944492 0.05299902
|
|
0.03707671 0.03902411 0.0396955 0.03844237]
|
|
|
|
mean value: 0.03992302417755127
|
|
|
|
key: score_time
|
|
value: [0.01228261 0.01283383 0.01281667 0.01279283 0.01276374 0.01285672
|
|
0.01310396 0.0128181 0.01273799 0.01292562]
|
|
|
|
mean value: 0.012793207168579101
|
|
|
|
key: test_mcc
|
|
value: [1. 0.95650071 1. 1. 1. 0.97801929
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.993452000843893
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97777778 1. 1. 1. 0.98888889
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.97826087 1. 1. 1. 0.98901099
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9967271858576207
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.95744681 1. 1. 1. 0.97826087
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9935707678075856
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.97777778 1. 1. 1. 0.98888889
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9966666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.95744681 1. 1. 1. 0.97826087
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9935707678075856
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03165293 0.05508423 0.04804564 0.04195166 0.04214764 0.04945612
|
|
0.05005693 0.04212046 0.04247046 0.05200267]
|
|
|
|
mean value: 0.04549887180328369
|
|
|
|
key: score_time
|
|
value: [0.01954985 0.01971459 0.01955962 0.01918316 0.0191257 0.01914597
|
|
0.01898503 0.01911139 0.01913929 0.01903248]
|
|
|
|
mean value: 0.0192547082901001
|
|
|
|
key: test_mcc
|
|
value: [0.80498447 0.80985829 0.82548988 0.84632727 0.87011096 0.8675239
|
|
0.89341253 0.85354573 0.88956845 0.83347626]
|
|
|
|
mean value: 0.8494297728818418
|
|
|
|
key: train_mcc
|
|
value: [0.88800201 0.89033793 0.89005172 0.88334426 0.89005172 0.89064647
|
|
0.88783361 0.88783361 0.87821137 0.89281783]
|
|
|
|
mean value: 0.8879130526344357
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.9 0.91111111 0.92222222 0.93333333 0.93333333
|
|
0.94382022 0.92134831 0.94382022 0.91011236]
|
|
|
|
mean value: 0.9219101123595506
|
|
|
|
key: train_accuracy
|
|
value: [0.94292804 0.94416873 0.94416873 0.94044665 0.94416873 0.94416873
|
|
0.94299876 0.94299876 0.93804213 0.94547708]
|
|
|
|
mean value: 0.9429566356416098
|
|
|
|
key: test_fscore
|
|
value: [0.90526316 0.90721649 0.91489362 0.92473118 0.93617021 0.93478261
|
|
0.94623656 0.92631579 0.94623656 0.91836735]
|
|
|
|
mean value: 0.9260213528710712
|
|
|
|
key: train_fscore
|
|
value: [0.94484412 0.94597839 0.94584838 0.94258373 0.94584838 0.94610778
|
|
0.94484412 0.94484412 0.94004796 0.94711538]
|
|
|
|
mean value: 0.9448062379094423
|
|
|
|
key: test_precision
|
|
value: [0.86 0.84615385 0.87755102 0.89583333 0.89795918 0.91489362
|
|
0.89795918 0.8627451 0.91666667 0.8490566 ]
|
|
|
|
mean value: 0.8818818552743025
|
|
|
|
key: train_precision
|
|
value: [0.91415313 0.91627907 0.9182243 0.90993072 0.9182243 0.91435185
|
|
0.91627907 0.91627907 0.90951276 0.91841492]
|
|
|
|
mean value: 0.9151649186906734
|
|
|
|
key: test_recall
|
|
value: [0.95555556 0.97777778 0.95555556 0.95555556 0.97777778 0.95555556
|
|
1. 1. 0.97777778 1. ]
|
|
|
|
mean value: 0.9755555555555555
|
|
|
|
key: train_recall
|
|
value: [0.97766749 0.97766749 0.9751861 0.97766749 0.9751861 0.98014888
|
|
0.97524752 0.97524752 0.97270471 0.97766749]
|
|
|
|
mean value: 0.9764390831142667
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 0.91111111 0.92222222 0.93333333 0.93333333
|
|
0.94444444 0.92222222 0.94343434 0.90909091]
|
|
|
|
mean value: 0.9219191919191919
|
|
|
|
key: train_roc_auc
|
|
value: [0.94292804 0.94416873 0.94416873 0.94044665 0.94416873 0.94416873
|
|
0.94295875 0.94295875 0.93808503 0.94551692]
|
|
|
|
mean value: 0.9429569073532664
|
|
|
|
key: test_jcc
|
|
value: [0.82692308 0.83018868 0.84313725 0.86 0.88 0.87755102
|
|
0.89795918 0.8627451 0.89795918 0.8490566 ]
|
|
|
|
mean value: 0.8625520100638223
|
|
|
|
key: train_jcc
|
|
value: [0.89545455 0.89749431 0.89726027 0.89140271 0.89726027 0.89772727
|
|
0.89545455 0.89545455 0.88687783 0.89954338]
|
|
|
|
mean value: 0.8953929684257154
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.45315671 0.46074963 0.54650807 0.52523971 0.45425153 0.48789287
|
|
0.32560182 0.4065733 0.33133173 0.32627273]
|
|
|
|
mean value: 0.4317578077316284
|
|
|
|
key: score_time
|
|
value: [0.01915765 0.01916885 0.01964307 0.02054811 0.01923037 0.01914525
|
|
0.01911402 0.0124743 0.01924253 0.01922941]
|
|
|
|
mean value: 0.018695354461669922
|
|
|
|
key: test_mcc
|
|
value: [0.80498447 0.84632727 0.80498447 0.84632727 0.87011096 0.88910845
|
|
0.89341253 0.85354573 0.91010101 0.83347626]
|
|
|
|
mean value: 0.8552378404836852
|
|
|
|
key: train_mcc
|
|
value: [0.88800201 0.9117935 0.92868322 0.88334426 0.9117935 0.91700016
|
|
0.91168329 0.91406603 0.91190676 0.91665426]
|
|
|
|
mean value: 0.9094926993884517
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.92222222 0.9 0.92222222 0.93333333 0.94444444
|
|
0.94382022 0.92134831 0.95505618 0.91011236]
|
|
|
|
mean value: 0.9252559300873908
|
|
|
|
key: train_accuracy
|
|
value: [0.94292804 0.95533499 0.96401985 0.94044665 0.95533499 0.95781638
|
|
0.95539033 0.95662949 0.95539033 0.95786865]
|
|
|
|
mean value: 0.954115970370917
|
|
|
|
key: test_fscore
|
|
value: [0.90526316 0.92473118 0.90526316 0.92473118 0.93617021 0.94505495
|
|
0.94623656 0.92631579 0.95555556 0.91836735]
|
|
|
|
mean value: 0.9287689090309574
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.94484412 0.95641646 0.96467722 0.94258373 0.95641646 0.9589372
|
|
0.95641646 0.95757576 0.95641646 0.95873786]
|
|
|
|
mean value: 0.9553021758941784
|
|
|
|
key: test_precision
|
|
value: [0.86 0.89583333 0.86 0.89583333 0.89795918 0.93478261
|
|
0.89795918 0.8627451 0.95555556 0.8490566 ]
|
|
|
|
mean value: 0.8909724900077614
|
|
|
|
key: train_precision
|
|
value: [0.91415313 0.93380615 0.94736842 0.90993072 0.93380615 0.93411765
|
|
0.93601896 0.93824228 0.93380615 0.93824228]
|
|
|
|
mean value: 0.9319491873929725
|
|
|
|
key: test_recall
|
|
value: [0.95555556 0.95555556 0.95555556 0.95555556 0.97777778 0.95555556
|
|
1. 1. 0.95555556 1. ]
|
|
|
|
mean value: 0.9711111111111111
|
|
|
|
key: train_recall
|
|
value: [0.97766749 0.98014888 0.98263027 0.97766749 0.98014888 0.98511166
|
|
0.97772277 0.97772277 0.98014888 0.98014888]
|
|
|
|
mean value: 0.9799118001130138
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.92222222 0.9 0.92222222 0.93333333 0.94444444
|
|
0.94444444 0.92222222 0.95505051 0.90909091]
|
|
|
|
mean value: 0.9253030303030304
|
|
|
|
key: train_roc_auc
|
|
value: [0.94292804 0.95533499 0.96401985 0.94044665 0.95533499 0.95781638
|
|
0.95536263 0.95660332 0.95542098 0.95789622]
|
|
|
|
mean value: 0.9541164041962509
|
|
|
|
key: test_jcc
|
|
value: [0.82692308 0.86 0.82692308 0.86 0.88 0.89583333
|
|
0.89795918 0.8627451 0.91489362 0.8490566 ]
|
|
|
|
mean value: 0.8674333989687034
|
|
|
|
key: train_jcc
|
|
value: [0.89545455 0.91647332 0.93176471 0.89140271 0.91647332 0.92111369
|
|
0.91647332 0.91860465 0.91647332 0.92074592]
|
|
|
|
mean value: 0.9144979498734581
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03689241 0.04265761 0.04580879 0.04016733 0.04726529 0.0484302
|
|
0.04094744 0.04846334 0.05894446 0.04132891]
|
|
|
|
mean value: 0.04509057998657227
|
|
|
|
key: score_time
|
|
value: [0.01441455 0.01293039 0.01289701 0.01875615 0.01302338 0.01307535
|
|
0.01482558 0.0131278 0.01342797 0.01478982]
|
|
|
|
mean value: 0.014126801490783691
|
|
|
|
key: test_mcc
|
|
value: [0.66683134 0.79772404 0.80498447 0.82962978 0.87011096 0.80498447
|
|
0.9347507 0.82801395 0.82020202 0.68782612]
|
|
|
|
mean value: 0.8045057838862064
|
|
|
|
key: train_mcc
|
|
value: [0.84617966 0.84850814 0.8353558 0.82802779 0.82802779 0.84007895
|
|
0.82325796 0.82353126 0.82682649 0.86965301]
|
|
|
|
mean value: 0.8369446857815686
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.88888889 0.9 0.91111111 0.93333333 0.9
|
|
0.96629213 0.91011236 0.91011236 0.84269663]
|
|
|
|
mean value: 0.8995880149812734
|
|
|
|
key: train_accuracy
|
|
value: [0.92183623 0.92307692 0.91687345 0.91315136 0.91315136 0.91935484
|
|
0.91078067 0.91078067 0.91201983 0.9330855 ]
|
|
|
|
mean value: 0.9174110835401158
|
|
|
|
key: test_fscore
|
|
value: [0.83516484 0.9 0.90526316 0.91666667 0.93617021 0.90526316
|
|
0.96703297 0.91489362 0.91111111 0.85106383]
|
|
|
|
mean value: 0.9042629555339522
|
|
|
|
key: train_fscore
|
|
value: [0.92473118 0.92583732 0.91937425 0.91586538 0.91586538 0.92159228
|
|
0.91366906 0.9138756 0.91537545 0.93586698]
|
|
|
|
mean value: 0.9202052893517897
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.81818182 0.86 0.8627451 0.89795918 0.86
|
|
0.93617021 0.86 0.91111111 0.81632653]
|
|
|
|
mean value: 0.8648580910905556
|
|
|
|
key: train_precision
|
|
value: [0.89170507 0.89376443 0.89252336 0.88811189 0.88811189 0.89671362
|
|
0.88604651 0.88425926 0.88073394 0.89749431]
|
|
|
|
mean value: 0.8899464280118269
|
|
|
|
key: test_recall
|
|
value: [0.84444444 1. 0.95555556 0.97777778 0.97777778 0.95555556
|
|
1. 0.97727273 0.91111111 0.88888889]
|
|
|
|
mean value: 0.9488383838383838
|
|
|
|
key: train_recall
|
|
value: [0.96029777 0.96029777 0.94789082 0.94540943 0.94540943 0.94789082
|
|
0.94306931 0.94554455 0.9528536 0.97766749]
|
|
|
|
mean value: 0.9526330982974228
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.88888889 0.9 0.91111111 0.93333333 0.9
|
|
0.96666667 0.91085859 0.91010101 0.84217172]
|
|
|
|
mean value: 0.8996464646464647
|
|
|
|
key: train_roc_auc
|
|
value: [0.92183623 0.92307692 0.91687345 0.91315136 0.91315136 0.91935484
|
|
0.91074061 0.91073754 0.91207036 0.93314068]
|
|
|
|
mean value: 0.9174133356263666
|
|
|
|
key: test_jcc
|
|
value: [0.71698113 0.81818182 0.82692308 0.84615385 0.88 0.82692308
|
|
0.93617021 0.84313725 0.83673469 0.74074074]
|
|
|
|
mean value: 0.82719458525435
|
|
|
|
key: train_jcc
|
|
value: [0.86 0.86191537 0.85077951 0.84478936 0.84478936 0.85458613
|
|
0.8410596 0.84140969 0.84395604 0.87946429]
|
|
|
|
mean value: 0.8522749345177734
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.03852129 0.92209411 1.05256343 0.92312527 1.02993727 0.97746301
|
|
1.06837082 0.93035865 0.94438505 1.13742685]
|
|
|
|
mean value: 1.0024245738983155
|
|
|
|
key: score_time
|
|
value: [0.01473808 0.01500893 0.01502061 0.01503897 0.01506519 0.0150485
|
|
0.01552367 0.01467824 0.01468825 0.01467204]
|
|
|
|
mean value: 0.014948248863220215
|
|
|
|
key: test_mcc
|
|
value: [0.88910845 0.85485041 0.91473203 0.91473203 0.91473203 0.87447463
|
|
0.95603853 0.87330789 0.85305908 0.91371736]
|
|
|
|
mean value: 0.8958752443730051
|
|
|
|
key: train_mcc
|
|
value: [0.95066493 0.96824584 0.97791139 0.96824584 0.96824584 0.97065374
|
|
0.96588366 0.96348955 0.97310106 0.9706906 ]
|
|
|
|
mean value: 0.967713243914834
|
|
|
|
key: test_accuracy
|
|
value: [0.94444444 0.92222222 0.95555556 0.95555556 0.95555556 0.93333333
|
|
0.97752809 0.93258427 0.92134831 0.95505618]
|
|
|
|
mean value: 0.9453183520599251
|
|
|
|
key: train_accuracy
|
|
value: [0.9751861 0.98387097 0.98883375 0.98387097 0.98387097 0.98511166
|
|
0.9826518 0.98141264 0.98636927 0.98513011]
|
|
|
|
mean value: 0.9836308233478158
|
|
|
|
key: test_fscore
|
|
value: [0.94505495 0.92783505 0.95744681 0.95744681 0.95744681 0.9375
|
|
0.97777778 0.93617021 0.92783505 0.95744681]
|
|
|
|
mean value: 0.9481960272734017
|
|
|
|
key: train_fscore
|
|
value: [0.9754902 0.98412698 0.98895706 0.98412698 0.98412698 0.98533007
|
|
0.98296837 0.981774 0.98653611 0.98533007]
|
|
|
|
mean value: 0.9838766825484062
|
|
|
|
key: test_precision
|
|
value: [0.93478261 0.86538462 0.91836735 0.91836735 0.91836735 0.88235294
|
|
0.95652174 0.88 0.86538462 0.91836735]
|
|
|
|
mean value: 0.9057895907526891
|
|
|
|
key: train_precision
|
|
value: [0.96368039 0.96875 0.97815534 0.96875 0.96875 0.97108434
|
|
0.96650718 0.96420048 0.97342995 0.97108434]
|
|
|
|
mean value: 0.9694392007965105
|
|
|
|
key: test_recall
|
|
value: [0.95555556 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: train_recall
|
|
value: [0.98759305 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9987593052109182
|
|
|
|
key: test_roc_auc
|
|
value: [0.94444444 0.92222222 0.95555556 0.95555556 0.95555556 0.93333333
|
|
0.97777778 0.93333333 0.92045455 0.95454545]
|
|
|
|
mean value: 0.9452777777777778
|
|
|
|
key: train_roc_auc
|
|
value: [0.9751861 0.98387097 0.98883375 0.98387097 0.98387097 0.98511166
|
|
0.98263027 0.98138958 0.98638614 0.98514851]
|
|
|
|
mean value: 0.9836298921455421
|
|
|
|
key: test_jcc
|
|
value: [0.89583333 0.86538462 0.91836735 0.91836735 0.91836735 0.88235294
|
|
0.95652174 0.88 0.86538462 0.91836735]
|
|
|
|
mean value: 0.9018946632164572
|
|
|
|
key: train_jcc
|
|
value: [0.95215311 0.96875 0.97815534 0.96875 0.96875 0.97108434
|
|
0.96650718 0.96420048 0.97342995 0.97108434]
|
|
|
|
mean value: 0.9682864730603751
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01632881 0.01156211 0.01145387 0.01119852 0.01099634 0.01095724
|
|
0.01095772 0.01161122 0.01106167 0.01103663]
|
|
|
|
mean value: 0.011716413497924804
|
|
|
|
key: score_time
|
|
value: [0.01445365 0.00946689 0.00935221 0.00918722 0.00900865 0.00896406
|
|
0.00893807 0.00899172 0.00905561 0.0089941 ]
|
|
|
|
mean value: 0.009641218185424804
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 0.24503975 0.05170877 0.27287642 0.24514517 0.15512631
|
|
0.46032248 0.19046035 0.25265357 0.33204343]
|
|
|
|
mean value: 0.242359413233725
|
|
|
|
key: train_mcc
|
|
value: [0.26004892 0.25453873 0.26811672 0.24874244 0.25439568 0.26409496
|
|
0.23517337 0.26782175 0.25114742 0.25189938]
|
|
|
|
mean value: 0.25559793714453005
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 0.52222222 0.62222222 0.61111111 0.56666667
|
|
0.68539326 0.58426966 0.60674157 0.65168539]
|
|
|
|
mean value: 0.6050312109862672
|
|
|
|
key: train_accuracy
|
|
value: [0.61414392 0.61290323 0.617866 0.60918114 0.61166253 0.61662531
|
|
0.6047088 0.61833953 0.61090458 0.61090458]
|
|
|
|
mean value: 0.6127239630897143
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.68965517 0.61946903 0.69090909 0.67889908 0.65486726
|
|
0.75438596 0.65420561 0.69565217 0.71559633]
|
|
|
|
mean value: 0.6820306372321387
|
|
|
|
key: train_fscore
|
|
value: [0.68868869 0.68548387 0.69138277 0.68468468 0.68668669 0.68944724
|
|
0.67875126 0.69138277 0.68410463 0.68473896]
|
|
|
|
mean value: 0.6865351540672405
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.56338028 0.51470588 0.58461538 0.578125 0.54411765
|
|
0.61428571 0.55555556 0.57142857 0.609375 ]
|
|
|
|
mean value: 0.5707017608415703
|
|
|
|
key: train_precision
|
|
value: [0.57718121 0.57724958 0.57983193 0.5738255 0.57550336 0.57939189
|
|
0.5721562 0.58080808 0.57529611 0.57504216]
|
|
|
|
mean value: 0.5766286011889984
|
|
|
|
key: test_recall
|
|
value: [0.8 0.88888889 0.77777778 0.84444444 0.82222222 0.82222222
|
|
0.97727273 0.79545455 0.88888889 0.86666667]
|
|
|
|
mean value: 0.8483838383838384
|
|
|
|
key: train_recall
|
|
value: [0.85359801 0.84367246 0.8560794 0.84863524 0.85111663 0.85111663
|
|
0.83415842 0.8539604 0.84367246 0.84615385]
|
|
|
|
mean value: 0.8482163476893595
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.6 0.52222222 0.62222222 0.61111111 0.56666667
|
|
0.68863636 0.58661616 0.60353535 0.64924242]
|
|
|
|
mean value: 0.6050252525252525
|
|
|
|
key: train_roc_auc
|
|
value: [0.61414392 0.61290323 0.617866 0.60918114 0.61166253 0.61662531
|
|
0.60442412 0.6180472 0.61119266 0.61119573]
|
|
|
|
mean value: 0.6127241849495123
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.52631579 0.44871795 0.52777778 0.51388889 0.48684211
|
|
0.6056338 0.48611111 0.53333333 0.55714286]
|
|
|
|
mean value: 0.518576361452566
|
|
|
|
key: train_jcc
|
|
value: [0.52519084 0.52147239 0.52833078 0.52054795 0.52286585 0.52607362
|
|
0.51371951 0.52833078 0.51987768 0.52061069]
|
|
|
|
mean value: 0.5227020087909052
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01939964 0.01594996 0.0159452 0.0158875 0.01586366 0.02144837
|
|
0.01713848 0.01604915 0.01601195 0.02883649]
|
|
|
|
mean value: 0.018253040313720704
|
|
|
|
key: score_time
|
|
value: [0.01221561 0.01233673 0.01222086 0.01223516 0.01217341 0.01263738
|
|
0.01248431 0.01226759 0.01244903 0.01386905]
|
|
|
|
mean value: 0.012488913536071778
|
|
|
|
key: test_mcc
|
|
value: [0.3666794 0.4412613 0.40418948 0.54433105 0.56568542 0.39197153
|
|
0.66601782 0.51490953 0.36743215 0.523574 ]
|
|
|
|
mean value: 0.47860516912740225
|
|
|
|
key: train_mcc
|
|
value: [0.51508036 0.51769575 0.51482412 0.49474344 0.5103364 0.50494434
|
|
0.50120602 0.52042203 0.49163928 0.48592392]
|
|
|
|
mean value: 0.5056815658919893
|
|
|
|
key: test_accuracy
|
|
value: [0.67777778 0.7 0.68888889 0.76666667 0.76666667 0.68888889
|
|
0.82022472 0.75280899 0.6741573 0.75280899]
|
|
|
|
mean value: 0.7288888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.74565757 0.74937965 0.74689826 0.74069479 0.74565757 0.74565757
|
|
0.74101611 0.74969021 0.73853779 0.73605948]
|
|
|
|
mean value: 0.7439249002985662
|
|
|
|
key: test_fscore
|
|
value: [0.71287129 0.75229358 0.73584906 0.78787879 0.8 0.7254902
|
|
0.84 0.77083333 0.72380952 0.78431373]
|
|
|
|
mean value: 0.763333948830441
|
|
|
|
key: train_fscore
|
|
value: [0.77885653 0.77899344 0.77826087 0.76751947 0.77595628 0.77196885
|
|
0.77257889 0.78091106 0.76633444 0.763596 ]
|
|
|
|
mean value: 0.7734975834149308
|
|
|
|
key: test_precision
|
|
value: [0.64285714 0.640625 0.63934426 0.72222222 0.7 0.64912281
|
|
0.75 0.71153846 0.63333333 0.70175439]
|
|
|
|
mean value: 0.6790797615228699
|
|
|
|
key: train_precision
|
|
value: [0.6889313 0.69667319 0.69245648 0.69556452 0.69335938 0.69959677
|
|
0.68932039 0.69498069 0.692 0.69076305]
|
|
|
|
mean value: 0.6933645768085946
|
|
|
|
key: test_recall
|
|
value: [0.8 0.91111111 0.86666667 0.86666667 0.93333333 0.82222222
|
|
0.95454545 0.84090909 0.84444444 0.88888889]
|
|
|
|
mean value: 0.8728787878787879
|
|
|
|
key: train_recall
|
|
value: [0.89578164 0.88337469 0.88833747 0.8560794 0.8808933 0.86104218
|
|
0.87871287 0.89108911 0.85856079 0.85359801]
|
|
|
|
mean value: 0.8747469473994546
|
|
|
|
key: test_roc_auc
|
|
value: [0.67777778 0.7 0.68888889 0.76666667 0.76666667 0.68888889
|
|
0.82171717 0.75378788 0.67222222 0.75126263]
|
|
|
|
mean value: 0.7287878787878788
|
|
|
|
key: train_roc_auc
|
|
value: [0.74565757 0.74937965 0.74689826 0.74069479 0.74565757 0.74565757
|
|
0.74084527 0.74951478 0.73868634 0.73620495]
|
|
|
|
mean value: 0.7439196742254871
|
|
|
|
key: test_jcc
|
|
value: [0.55384615 0.60294118 0.58208955 0.65 0.66666667 0.56923077
|
|
0.72413793 0.62711864 0.56716418 0.64516129]
|
|
|
|
mean value: 0.6188356362982321
|
|
|
|
key: train_jcc
|
|
value: [0.63780919 0.63799283 0.63701068 0.62274368 0.63392857 0.62862319
|
|
0.62943262 0.6405694 0.62118492 0.61759425]
|
|
|
|
mean value: 0.6306889330400278
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01535296 0.01102614 0.01215005 0.01115704 0.01218557 0.01206779
|
|
0.01263738 0.01287818 0.01261473 0.01268816]
|
|
|
|
mean value: 0.012475800514221192
|
|
|
|
key: score_time
|
|
value: [0.03680086 0.01316953 0.01474047 0.01443195 0.01453042 0.01481295
|
|
0.01456666 0.01927614 0.01503778 0.01550984]
|
|
|
|
mean value: 0.017287659645080566
|
|
|
|
key: test_mcc
|
|
value: [0.79772404 0.6894997 0.79772404 0.79772404 0.77919372 0.77919372
|
|
0.83410221 0.85354573 0.83347626 0.63206237]
|
|
|
|
mean value: 0.7794245815990786
|
|
|
|
key: train_mcc
|
|
value: [0.83691215 0.84335796 0.84767335 0.83477069 0.84983659 0.84335796
|
|
0.83706734 0.85431725 0.83713362 0.85437016]
|
|
|
|
mean value: 0.8438797075182797
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.82222222 0.88888889 0.88888889 0.87777778 0.87777778
|
|
0.91011236 0.92134831 0.91011236 0.78651685]
|
|
|
|
mean value: 0.8772534332084894
|
|
|
|
key: train_accuracy
|
|
value: [0.91191067 0.91563275 0.91811414 0.91066998 0.91935484 0.91563275
|
|
0.91201983 0.92193309 0.91201983 0.92193309]
|
|
|
|
mean value: 0.915922096051608
|
|
|
|
key: test_fscore
|
|
value: [0.9 0.8490566 0.9 0.9 0.89108911 0.89108911
|
|
0.91666667 0.92631579 0.91836735 0.82568807]
|
|
|
|
mean value: 0.8918272698068989
|
|
|
|
key: train_fscore
|
|
value: [0.91904219 0.9221968 0.92431193 0.91799544 0.92537313 0.9221968
|
|
0.91922639 0.92766935 0.91904219 0.92750288]
|
|
|
|
mean value: 0.922455709244472
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.73770492 0.81818182 0.81818182 0.80357143 0.80357143
|
|
0.84615385 0.8627451 0.8490566 0.703125 ]
|
|
|
|
mean value: 0.8060473777687746
|
|
|
|
key: train_precision
|
|
value: [0.85021097 0.85562633 0.85927505 0.84842105 0.86111111 0.85562633
|
|
0.85052632 0.86509636 0.85021097 0.86480687]
|
|
|
|
mean value: 0.8560911354388981
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 0.82222222 0.88888889 0.88888889 0.87777778 0.87777778
|
|
0.91111111 0.92222222 0.90909091 0.78409091]
|
|
|
|
mean value: 0.8770959595959595
|
|
|
|
key: train_roc_auc
|
|
value: [0.91191067 0.91563275 0.91811414 0.91066998 0.91935484 0.91563275
|
|
0.91191067 0.92183623 0.91212871 0.9220297 ]
|
|
|
|
mean value: 0.9159220450581038
|
|
|
|
key: test_jcc
|
|
value: [0.81818182 0.73770492 0.81818182 0.81818182 0.80357143 0.80357143
|
|
0.84615385 0.8627451 0.8490566 0.703125 ]
|
|
|
|
mean value: 0.8060473777687746
|
|
|
|
key: train_jcc
|
|
value: [0.85021097 0.85562633 0.85927505 0.84842105 0.86111111 0.85562633
|
|
0.85052632 0.86509636 0.85021097 0.86480687]
|
|
|
|
mean value: 0.8560911354388981
|
|
|
|
MCC on Blind test: -0.09
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03907251 0.03950047 0.03794765 0.03909898 0.03857589 0.03941894
|
|
0.04615736 0.03867316 0.03909087 0.03874469]
|
|
|
|
mean value: 0.03962805271148682
|
|
|
|
key: score_time
|
|
value: [0.01733708 0.01737952 0.01672649 0.01740146 0.0170269 0.0170207
|
|
0.01870465 0.01699853 0.01708412 0.01693702]
|
|
|
|
mean value: 0.017261648178100587
|
|
|
|
key: test_mcc
|
|
value: [0.66683134 0.82962978 0.8675239 0.82548988 0.89087081 0.80498447
|
|
0.87330789 0.84285003 0.77592401 0.66413681]
|
|
|
|
mean value: 0.8041548917576528
|
|
|
|
key: train_mcc
|
|
value: [0.84957998 0.84957998 0.88198009 0.87278605 0.8581532 0.85454828
|
|
0.8650896 0.85710293 0.84785647 0.8750372 ]
|
|
|
|
mean value: 0.8611713765619706
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.91111111 0.93333333 0.91111111 0.94444444 0.9
|
|
0.93258427 0.92134831 0.88764045 0.83146067]
|
|
|
|
mean value: 0.9006367041198502
|
|
|
|
key: train_accuracy
|
|
value: [0.92431762 0.92431762 0.94044665 0.93548387 0.9280397 0.92679901
|
|
0.93184634 0.92812887 0.92317224 0.93680297]
|
|
|
|
mean value: 0.9299354900206321
|
|
|
|
key: test_fscore
|
|
value: [0.83146067 0.91666667 0.93478261 0.91489362 0.94623656 0.90526316
|
|
0.93617021 0.91954023 0.89130435 0.83870968]
|
|
|
|
mean value: 0.9035027751471878
|
|
|
|
key: train_fscore
|
|
value: [0.92606061 0.92606061 0.94188862 0.9375 0.93045564 0.92848485
|
|
0.93381468 0.92978208 0.9253012 0.9384801 ]
|
|
|
|
mean value: 0.931782838070503
|
|
|
|
key: test_precision
|
|
value: [0.84090909 0.8627451 0.91489362 0.87755102 0.91666667 0.86
|
|
0.88 0.93023256 0.87234043 0.8125 ]
|
|
|
|
mean value: 0.8767838476715863
|
|
|
|
key: train_precision
|
|
value: [0.90521327 0.90521327 0.91962175 0.90909091 0.90023202 0.90758294
|
|
0.90866511 0.90995261 0.89929742 0.91314554]
|
|
|
|
mean value: 0.9078014831549542
|
|
|
|
key: test_recall
|
|
value: [0.82222222 0.97777778 0.95555556 0.95555556 0.97777778 0.95555556
|
|
1. 0.90909091 0.91111111 0.86666667]
|
|
|
|
mean value: 0.9331313131313131
|
|
|
|
key: train_recall
|
|
value: [0.94789082 0.94789082 0.96526055 0.96774194 0.96277916 0.95037221
|
|
0.96039604 0.95049505 0.9528536 0.96526055]
|
|
|
|
mean value: 0.9570940716900473
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.91111111 0.93333333 0.91111111 0.94444444 0.9
|
|
0.93333333 0.92121212 0.88737374 0.83106061]
|
|
|
|
mean value: 0.9006313131313132
|
|
|
|
key: train_roc_auc
|
|
value: [0.92431762 0.92431762 0.94044665 0.93548387 0.9280397 0.92679901
|
|
0.93181092 0.92810112 0.92320898 0.93683819]
|
|
|
|
mean value: 0.9299363683266589
|
|
|
|
key: test_jcc
|
|
value: [0.71153846 0.84615385 0.87755102 0.84313725 0.89795918 0.82692308
|
|
0.88 0.85106383 0.80392157 0.72222222]
|
|
|
|
mean value: 0.8260470464235885
|
|
|
|
key: train_jcc
|
|
value: [0.86230248 0.86230248 0.89016018 0.88235294 0.86995516 0.86651584
|
|
0.8758465 0.86877828 0.86098655 0.88409091]
|
|
|
|
mean value: 0.8723291322285298
|
|
|
|
MCC on Blind test: -0.09
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.13466215 1.38615632 0.91014862 1.89251709 2.69656277 2.08642912
|
|
1.97516394 1.68166924 1.6152091 2.55937052]
|
|
|
|
mean value: 1.8937888860702514
|
|
|
|
key: score_time
|
|
value: [0.01254797 0.01260066 0.01293921 0.01290536 0.01264477 0.01261783
|
|
0.01299095 0.01266861 0.01289272 0.01266623]
|
|
|
|
mean value: 0.012747430801391601
|
|
|
|
key: test_mcc
|
|
value: [0.91473203 0.85485041 0.82548988 0.95650071 0.93541435 0.93541435
|
|
0.95603853 0.9347507 0.87294449 0.87294449]
|
|
|
|
mean value: 0.9059079945934088
|
|
|
|
key: train_mcc
|
|
value: [0.95866971 0.94211879 0.85846132 0.97306727 0.98522086 0.98522086
|
|
0.98036568 0.98036568 0.92361012 0.9901355 ]
|
|
|
|
mean value: 0.9577235792283715
|
|
|
|
key: test_accuracy
|
|
value: [0.95555556 0.92222222 0.91111111 0.97777778 0.96666667 0.96666667
|
|
0.97752809 0.96629213 0.93258427 0.93258427]
|
|
|
|
mean value: 0.9508988764044943
|
|
|
|
key: train_accuracy
|
|
value: [0.97890819 0.97022333 0.9280397 0.98635236 0.99255583 0.99255583
|
|
0.99008674 0.99008674 0.96034696 0.99504337]
|
|
|
|
mean value: 0.9784199052336718
|
|
|
|
key: test_fscore
|
|
value: [0.95744681 0.92783505 0.91489362 0.97826087 0.96774194 0.96774194
|
|
0.97777778 0.96703297 0.9375 0.9375 ]
|
|
|
|
mean value: 0.9533730962422011
|
|
|
|
key: train_fscore
|
|
value: [0.97934386 0.97108434 0.93062201 0.98653611 0.99261084 0.99261084
|
|
0.99019608 0.99019608 0.96181384 0.99506173]
|
|
|
|
mean value: 0.9790075721159184
|
|
|
|
key: test_precision
|
|
value: [0.91836735 0.86538462 0.87755102 0.95744681 0.9375 0.9375
|
|
0.95652174 0.93617021 0.88235294 0.88235294]
|
|
|
|
mean value: 0.9151147625491526
|
|
|
|
key: train_precision
|
|
value: [0.95952381 0.94379391 0.89838337 0.97342995 0.98533007 0.98533007
|
|
0.98058252 0.98058252 0.92643678 0.99017199]
|
|
|
|
mean value: 0.9623565011070279
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.95555556 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955555555555555
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.96526055 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9965260545905708
|
|
|
|
key: test_roc_auc
|
|
value: [0.95555556 0.92222222 0.91111111 0.97777778 0.96666667 0.96666667
|
|
0.97777778 0.96666667 0.93181818 0.93181818]
|
|
|
|
mean value: 0.9508080808080809
|
|
|
|
key: train_roc_auc
|
|
value: [0.97890819 0.97022333 0.9280397 0.98635236 0.99255583 0.99255583
|
|
0.99007444 0.99007444 0.96039604 0.9950495 ]
|
|
|
|
mean value: 0.9784229663661155
|
|
|
|
key: test_jcc
|
|
value: [0.91836735 0.86538462 0.84313725 0.95744681 0.9375 0.9375
|
|
0.95652174 0.93617021 0.88235294 0.88235294]
|
|
|
|
mean value: 0.9116733859985323
|
|
|
|
key: train_jcc
|
|
value: [0.95952381 0.94379391 0.87024609 0.97342995 0.98533007 0.98533007
|
|
0.98058252 0.98058252 0.92643678 0.99017199]
|
|
|
|
mean value: 0.9595427724256984
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04402685 0.03535295 0.03534961 0.03315139 0.03281832 0.03226852
|
|
0.02961707 0.03274298 0.03354883 0.03338313]
|
|
|
|
mean value: 0.034225964546203615
|
|
|
|
key: score_time
|
|
value: [0.01150918 0.00930882 0.0091114 0.0088923 0.00888538 0.00895286
|
|
0.00891876 0.00898528 0.00899959 0.00901103]
|
|
|
|
mean value: 0.00925745964050293
|
|
|
|
key: test_mcc
|
|
value: [0.91473203 0.95650071 0.87447463 0.87447463 1. 0.93541435
|
|
0.89341253 0.95603853 0.95599503 0.97776667]
|
|
|
|
mean value: 0.9338809115019777
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95555556 0.97777778 0.93333333 0.93333333 1. 0.96666667
|
|
0.94382022 0.97752809 0.97752809 0.98876404]
|
|
|
|
mean value: 0.9654307116104869
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95744681 0.97826087 0.9375 0.9375 1. 0.96774194
|
|
0.94623656 0.97777778 0.97826087 0.98901099]
|
|
|
|
mean value: 0.9669735809053496
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0.91836735 0.95744681 0.88235294 0.88235294 1. 0.9375
|
|
0.89795918 0.95652174 0.95744681 0.97826087]
|
|
|
|
mean value: 0.9368208638682115
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95555556 0.97777778 0.93333333 0.93333333 1. 0.96666667
|
|
0.94444444 0.97777778 0.97727273 0.98863636]
|
|
|
|
mean value: 0.965479797979798
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91836735 0.95744681 0.88235294 0.88235294 1. 0.9375
|
|
0.89795918 0.95652174 0.95744681 0.97826087]
|
|
|
|
mean value: 0.9368208638682115
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13578272 0.13558602 0.13673377 0.13806367 0.13704205 0.13497138
|
|
0.13595009 0.13633823 0.13617516 0.13594556]
|
|
|
|
mean value: 0.136258864402771
|
|
|
|
key: score_time
|
|
value: [0.0181489 0.01821208 0.01828361 0.01828337 0.01825523 0.01821113
|
|
0.0180285 0.01819825 0.01823139 0.01823854]
|
|
|
|
mean value: 0.018209099769592285
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 1. 1. 0.97801929
|
|
0.97777778 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9933574849399207
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.98888889
|
|
0.98876404 0.98876404 1. 1. ]
|
|
|
|
mean value: 0.9966416978776529
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 1. 1. 0.98901099
|
|
0.98876404 0.98876404 1. 1. ]
|
|
|
|
mean value: 0.996653907889863
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 0.97826087
|
|
0.97777778 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9933816425120773
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.98888889
|
|
0.98888889 0.98888889 1. 1. ]
|
|
|
|
mean value: 0.9966666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 1. 1. 0.97826087
|
|
0.97777778 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9933816425120773
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01168323 0.0116694 0.01215816 0.01272511 0.01220274 0.01303887
|
|
0.01297855 0.01308727 0.01302576 0.01163054]
|
|
|
|
mean value: 0.012419962882995605
|
|
|
|
key: score_time
|
|
value: [0.00913024 0.00893116 0.00980639 0.00893188 0.00984216 0.00985599
|
|
0.00990105 0.00991845 0.00986791 0.00895834]
|
|
|
|
mean value: 0.009514355659484863
|
|
|
|
key: test_mcc
|
|
value: [0.93541435 0.97801929 0.93541435 0.87447463 0.95650071 0.89442719
|
|
0.91388467 0.91388467 1. 0.93465477]
|
|
|
|
mean value: 0.9336674633093849
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96666667 0.98888889 0.96666667 0.93333333 0.97777778 0.94444444
|
|
0.95505618 0.95505618 1. 0.96629213]
|
|
|
|
mean value: 0.96541822721598
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.98901099 0.96774194 0.9375 0.97826087 0.94736842
|
|
0.95652174 0.95652174 1. 0.96774194]
|
|
|
|
mean value: 0.966840956434132
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.97826087 0.9375 0.88235294 0.95744681 0.9
|
|
0.91666667 0.91666667 1. 0.9375 ]
|
|
|
|
mean value: 0.936389395258566
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96666667 0.98888889 0.96666667 0.93333333 0.97777778 0.94444444
|
|
0.95555556 0.95555556 1. 0.96590909]
|
|
|
|
mean value: 0.965479797979798
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.97826087 0.9375 0.88235294 0.95744681 0.9
|
|
0.91666667 0.91666667 1. 0.9375 ]
|
|
|
|
mean value: 0.936389395258566
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.00036168 1.91632915 1.94636726 1.94240832 1.93890262 1.93303847
|
|
1.93761015 1.92943025 1.92687869 1.93970942]
|
|
|
|
mean value: 1.9411036014556884
|
|
|
|
key: score_time
|
|
value: [0.09515929 0.09486294 0.09474182 0.09475875 0.09536195 0.09506249
|
|
0.09508657 0.09513164 0.09504128 0.09470463]
|
|
|
|
mean value: 0.094991135597229
|
|
|
|
key: test_mcc
|
|
value: [0.97801929 1. 0.97801929 0.97801929 0.97801929 0.95650071
|
|
0.97777778 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9824133445525439
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98888889 1. 0.98888889 0.98888889 0.98888889 0.97777778
|
|
0.98876404 0.98876404 1. 1. ]
|
|
|
|
mean value: 0.9910861423220974
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98901099 1. 0.98901099 0.98901099 0.98901099 0.97826087
|
|
0.98876404 0.98876404 1. 1. ]
|
|
|
|
mean value: 0.9911832915496814
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.97826087 1. 0.97826087 0.97826087 0.97826087 0.95744681
|
|
0.97777778 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9826045842327064
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98888889 1. 0.98888889 0.98888889 0.98888889 0.97777778
|
|
0.98888889 0.98888889 1. 1. ]
|
|
|
|
mean value: 0.991111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97826087 1. 0.97826087 0.97826087 0.97826087 0.95744681
|
|
0.97777778 0.97777778 1. 1. ]
|
|
|
|
mean value: 0.9826045842327064
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.03961277 1.08452988 1.08938456 1.06229687 1.0555737 1.06858301
|
|
1.04412293 1.06482911 1.06183457 1.10514545]
|
|
|
|
mean value: 1.0675912857055665
|
|
|
|
key: score_time
|
|
value: [0.24056244 0.25116324 0.26242518 0.27658844 0.2851882 0.18325329
|
|
0.25878429 0.25916886 0.28672481 0.35383105]
|
|
|
|
mean value: 0.2657689809799194
|
|
|
|
key: test_mcc
|
|
value: [0.95650071 0.97801929 0.97801929 0.97801929 0.97801929 0.93541435
|
|
0.95603853 0.91388467 1. 1. ]
|
|
|
|
mean value: 0.9673915429284201
|
|
|
|
key: train_mcc
|
|
value: [0.98277854 0.98766907 0.98277854 0.98766907 0.98766907 0.98766907
|
|
0.98768405 0.98768405 0.98768443 0.98280004]
|
|
|
|
mean value: 0.9862085941756387
|
|
|
|
key: test_accuracy
|
|
value: [0.97777778 0.98888889 0.98888889 0.98888889 0.98888889 0.96666667
|
|
0.97752809 0.95505618 1. 1. ]
|
|
|
|
mean value: 0.9832584269662922
|
|
|
|
key: train_accuracy
|
|
value: [0.99131514 0.99379653 0.99131514 0.99379653 0.99379653 0.99379653
|
|
0.99380421 0.99380421 0.99380421 0.9913259 ]
|
|
|
|
mean value: 0.9930554914965516
|
|
|
|
key: test_fscore
|
|
value: [0.97826087 0.98901099 0.98901099 0.98901099 0.98901099 0.96774194
|
|
0.97777778 0.95652174 1. 1. ]
|
|
|
|
mean value: 0.9836346278001257
|
|
|
|
key: train_fscore
|
|
value: [0.99138991 0.99383477 0.99138991 0.99383477 0.99383477 0.99383477
|
|
0.99384994 0.99384994 0.99383477 0.99138991]
|
|
|
|
mean value: 0.9931043478128986
|
|
|
|
key: test_precision
|
|
value: [0.95744681 0.97826087 0.97826087 0.97826087 0.97826087 0.9375
|
|
0.95652174 0.91666667 1. 1. ]
|
|
|
|
mean value: 0.9681178692568609
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.98292683 0.9877451 0.98292683 0.9877451 0.9877451 0.9877451
|
|
0.98777506 0.98777506 0.9877451 0.98292683]
|
|
|
|
mean value: 0.9863056100250346
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97777778 0.98888889 0.98888889 0.98888889 0.98888889 0.96666667
|
|
0.97777778 0.95555556 1. 1. ]
|
|
|
|
mean value: 0.9833333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.99131514 0.99379653 0.99131514 0.99379653 0.99379653 0.99379653
|
|
0.99379653 0.99379653 0.99381188 0.99133663]
|
|
|
|
mean value: 0.9930557944131883
|
|
|
|
key: test_jcc
|
|
value: [0.95744681 0.97826087 0.97826087 0.97826087 0.97826087 0.9375
|
|
0.95652174 0.91666667 1. 1. ]
|
|
|
|
mean value: 0.9681178692568609
|
|
|
|
key: train_jcc
|
|
value: [0.98292683 0.9877451 0.98292683 0.9877451 0.9877451 0.9877451
|
|
0.98777506 0.98777506 0.9877451 0.98292683]
|
|
|
|
mean value: 0.9863056100250346
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02743244 0.01602912 0.01588845 0.01588988 0.0234673 0.0158751
|
|
0.0244658 0.0163517 0.01643538 0.01675177]
|
|
|
|
mean value: 0.018858695030212404
|
|
|
|
key: score_time
|
|
value: [0.01356125 0.01220489 0.01230812 0.01212144 0.01219606 0.01241279
|
|
0.01241374 0.01228118 0.01232433 0.01240945]
|
|
|
|
mean value: 0.012423324584960937
|
|
|
|
key: test_mcc
|
|
value: [0.3666794 0.4412613 0.40418948 0.54433105 0.56568542 0.39197153
|
|
0.66601782 0.51490953 0.36743215 0.523574 ]
|
|
|
|
mean value: 0.47860516912740225
|
|
|
|
key: train_mcc
|
|
value: [0.51508036 0.51769575 0.51482412 0.49474344 0.5103364 0.50494434
|
|
0.50120602 0.52042203 0.49163928 0.48592392]
|
|
|
|
mean value: 0.5056815658919893
|
|
|
|
key: test_accuracy
|
|
value: [0.67777778 0.7 0.68888889 0.76666667 0.76666667 0.68888889
|
|
0.82022472 0.75280899 0.6741573 0.75280899]
|
|
|
|
mean value: 0.7288888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.74565757 0.74937965 0.74689826 0.74069479 0.74565757 0.74565757
|
|
0.74101611 0.74969021 0.73853779 0.73605948]
|
|
|
|
mean value: 0.7439249002985662
|
|
|
|
key: test_fscore
|
|
value: [0.71287129 0.75229358 0.73584906 0.78787879 0.8 0.7254902
|
|
0.84 0.77083333 0.72380952 0.78431373]
|
|
|
|
mean value: 0.763333948830441
|
|
|
|
key: train_fscore
|
|
value: [0.77885653 0.77899344 0.77826087 0.76751947 0.77595628 0.77196885
|
|
0.77257889 0.78091106 0.76633444 0.763596 ]
|
|
|
|
mean value: 0.7734975834149308
|
|
|
|
key: test_precision
|
|
value: [0.64285714 0.640625 0.63934426 0.72222222 0.7 0.64912281
|
|
0.75 0.71153846 0.63333333 0.70175439]
|
|
|
|
mean value: 0.6790797615228699
|
|
|
|
key: train_precision
|
|
value: [0.6889313 0.69667319 0.69245648 0.69556452 0.69335938 0.69959677
|
|
0.68932039 0.69498069 0.692 0.69076305]
|
|
|
|
mean value: 0.6933645768085946
|
|
|
|
key: test_recall
|
|
value: [0.8 0.91111111 0.86666667 0.86666667 0.93333333 0.82222222
|
|
0.95454545 0.84090909 0.84444444 0.88888889]
|
|
|
|
mean value: 0.8728787878787879
|
|
|
|
key: train_recall
|
|
value: [0.89578164 0.88337469 0.88833747 0.8560794 0.8808933 0.86104218
|
|
0.87871287 0.89108911 0.85856079 0.85359801]
|
|
|
|
mean value: 0.8747469473994546
|
|
|
|
key: test_roc_auc
|
|
value: [0.67777778 0.7 0.68888889 0.76666667 0.76666667 0.68888889
|
|
0.82171717 0.75378788 0.67222222 0.75126263]
|
|
|
|
mean value: 0.7287878787878788
|
|
|
|
key: train_roc_auc
|
|
value: [0.74565757 0.74937965 0.74689826 0.74069479 0.74565757 0.74565757
|
|
0.74084527 0.74951478 0.73868634 0.73620495]
|
|
|
|
mean value: 0.7439196742254871
|
|
|
|
key: test_jcc
|
|
value: [0.55384615 0.60294118 0.58208955 0.65 0.66666667 0.56923077
|
|
0.72413793 0.62711864 0.56716418 0.64516129]
|
|
|
|
mean value: 0.6188356362982321
|
|
|
|
key: train_jcc
|
|
value: [0.63780919 0.63799283 0.63701068 0.62274368 0.63392857 0.62862319
|
|
0.62943262 0.6405694 0.62118492 0.61759425]
|
|
|
|
mean value: 0.6306889330400278
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.11954927 0.10243893 0.12050772 0.09531236 0.09984493 0.10098267
|
|
0.09905386 0.09511495 0.09808731 0.09488583]
|
|
|
|
mean value: 0.1025777816772461
|
|
|
|
key: score_time
|
|
value: [0.01177788 0.01129889 0.011235 0.01115942 0.01139808 0.01177025
|
|
0.0112772 0.01129413 0.01119161 0.01108813]
|
|
|
|
mean value: 0.011349058151245118
|
|
|
|
key: test_mcc
|
|
value: [0.89442719 1. 0.95650071 0.95650071 0.93541435 0.91473203
|
|
0.91388467 0.91388467 0.95599503 0.95599503]
|
|
|
|
mean value: 0.939733439719171
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94444444 1. 0.97777778 0.97777778 0.96666667 0.95555556
|
|
0.95505618 0.95505618 0.97752809 0.97752809]
|
|
|
|
mean value: 0.9687390761548065
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 1. 0.97826087 0.97826087 0.96774194 0.95744681
|
|
0.95652174 0.95652174 0.97826087 0.97826087]
|
|
|
|
mean value: 0.969864412156888
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 0.95744681 0.95744681 0.9375 0.91836735
|
|
0.91666667 0.91666667 0.95744681 0.95744681]
|
|
|
|
mean value: 0.9418987914314663
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94444444 1. 0.97777778 0.97777778 0.96666667 0.95555556
|
|
0.95555556 0.95555556 0.97727273 0.97727273]
|
|
|
|
mean value: 0.9687878787878788
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 1. 0.95744681 0.95744681 0.9375 0.91836735
|
|
0.91666667 0.91666667 0.95744681 0.95744681]
|
|
|
|
mean value: 0.9418987914314663
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05254674 0.07882881 0.08757758 0.06172037 0.08176923 0.08085275
|
|
0.12069535 0.05918574 0.05596089 0.05721736]
|
|
|
|
mean value: 0.07363548278808593
|
|
|
|
key: score_time
|
|
value: [0.01891685 0.01233602 0.01903749 0.01259184 0.01906371 0.01240969
|
|
0.02544355 0.0122056 0.01293063 0.01262069]
|
|
|
|
mean value: 0.015755605697631837
|
|
|
|
key: test_mcc
|
|
value: [0.73624773 0.85485041 0.85485041 0.87447463 0.85485041 0.87011096
|
|
0.85354573 0.82801395 0.83347626 0.71626222]
|
|
|
|
mean value: 0.8276682717583613
|
|
|
|
key: train_mcc
|
|
value: [0.87836767 0.89329743 0.87030254 0.90068638 0.87258194 0.87756812
|
|
0.89628551 0.87998156 0.89182611 0.89453265]
|
|
|
|
mean value: 0.8855429902876233
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.92222222 0.92222222 0.93333333 0.92222222 0.93333333
|
|
0.92134831 0.91011236 0.91011236 0.85393258]
|
|
|
|
mean value: 0.9095505617977528
|
|
|
|
key: train_accuracy
|
|
value: [0.93796526 0.94540943 0.93300248 0.94789082 0.93424318 0.93672457
|
|
0.94547708 0.93804213 0.94299876 0.94547708]
|
|
|
|
mean value: 0.9407230775380433
|
|
|
|
key: test_fscore
|
|
value: [0.87234043 0.92783505 0.92783505 0.9375 0.92783505 0.93617021
|
|
0.92631579 0.91489362 0.91836735 0.86597938]
|
|
|
|
mean value: 0.9155071927814082
|
|
|
|
key: train_fscore
|
|
value: [0.94019139 0.94736842 0.93617021 0.9504717 0.93727811 0.93964497
|
|
0.94835681 0.94089835 0.94600939 0.94774347]
|
|
|
|
mean value: 0.9434132806684936
|
|
|
|
key: test_precision
|
|
value: [0.83673469 0.86538462 0.86538462 0.88235294 0.86538462 0.89795918
|
|
0.8627451 0.86 0.8490566 0.80769231]
|
|
|
|
mean value: 0.8592694674386445
|
|
|
|
key: train_precision
|
|
value: [0.90762125 0.91454965 0.89390519 0.90561798 0.8959276 0.89819005
|
|
0.90178571 0.90045249 0.89755011 0.90888383]
|
|
|
|
mean value: 0.9024483858364686
|
|
|
|
key: test_recall
|
|
value: [0.91111111 1. 1. 1. 1. 0.97777778
|
|
1. 0.97727273 1. 0.93333333]
|
|
|
|
mean value: 0.9799494949494949
|
|
|
|
key: train_recall
|
|
value: [0.9751861 0.98263027 0.98263027 1. 0.98263027 0.98511166
|
|
1. 0.98514851 1. 0.99007444]
|
|
|
|
mean value: 0.988341154214677
|
|
|
|
key: test_roc_auc
|
|
value: [0.86666667 0.92222222 0.92222222 0.93333333 0.92222222 0.93333333
|
|
0.92222222 0.91085859 0.90909091 0.8530303 ]
|
|
|
|
mean value: 0.909520202020202
|
|
|
|
key: train_roc_auc
|
|
value: [0.93796526 0.94540943 0.93300248 0.94789082 0.93424318 0.93672457
|
|
0.94540943 0.93798369 0.94306931 0.94553227]
|
|
|
|
mean value: 0.9407230425275779
|
|
|
|
key: test_jcc
|
|
value: [0.77358491 0.86538462 0.86538462 0.88235294 0.86538462 0.88
|
|
0.8627451 0.84313725 0.8490566 0.76363636]
|
|
|
|
mean value: 0.8450667013341819
|
|
|
|
key: train_jcc
|
|
value: [0.88713318 0.9 0.88 0.90561798 0.88195991 0.88616071
|
|
0.90178571 0.88839286 0.89755011 0.9006772 ]
|
|
|
|
mean value: 0.8929277669261269
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01548171 0.02174878 0.01553392 0.01638222 0.01600909 0.01596785
|
|
0.01603436 0.01590562 0.01601791 0.0156517 ]
|
|
|
|
mean value: 0.01647331714630127
|
|
|
|
key: score_time
|
|
value: [0.01279306 0.02502251 0.01233053 0.01239634 0.01235223 0.01235175
|
|
0.01235223 0.01238799 0.01236486 0.01236844]
|
|
|
|
mean value: 0.01367199420928955
|
|
|
|
key: test_mcc
|
|
value: [0.24840131 0.11691309 0.1833397 0.35564338 0.38118125 0.1833397
|
|
0.37139974 0.30353535 0.26764796 0.35147059]
|
|
|
|
mean value: 0.2762872081246453
|
|
|
|
key: train_mcc
|
|
value: [0.32357917 0.31884126 0.3472553 0.27920398 0.30396993 0.3330531
|
|
0.2511237 0.28836601 0.30707072 0.25829293]
|
|
|
|
mean value: 0.3010756099753178
|
|
|
|
key: test_accuracy
|
|
value: [0.62222222 0.55555556 0.58888889 0.67777778 0.68888889 0.58888889
|
|
0.68539326 0.65168539 0.62921348 0.6741573 ]
|
|
|
|
mean value: 0.6362671660424469
|
|
|
|
key: train_accuracy
|
|
value: [0.65880893 0.65632754 0.66873449 0.63771712 0.64888337 0.66377171
|
|
0.62453532 0.64188352 0.65055762 0.62825279]
|
|
|
|
mean value: 0.6479472420292662
|
|
|
|
key: test_fscore
|
|
value: [0.65306122 0.61538462 0.63366337 0.68131868 0.70833333 0.63366337
|
|
0.68888889 0.65168539 0.67961165 0.70103093]
|
|
|
|
mean value: 0.6646641447667497
|
|
|
|
key: train_fscore
|
|
value: [0.68856172 0.68700565 0.7036626 0.66513761 0.68094701 0.69169511
|
|
0.64808362 0.67196368 0.68099548 0.64788732]
|
|
|
|
mean value: 0.6765939803782958
|
|
|
|
key: test_precision
|
|
value: [0.60377358 0.54237288 0.57142857 0.67391304 0.66666667 0.57142857
|
|
0.67391304 0.64444444 0.60344828 0.65384615]
|
|
|
|
mean value: 0.6205235236894591
|
|
|
|
key: train_precision
|
|
value: [0.63333333 0.63070539 0.63654618 0.61833689 0.62396694 0.63865546
|
|
0.61050328 0.62054507 0.62577963 0.61469933]
|
|
|
|
mean value: 0.6253071516869549
|
|
|
|
key: test_recall
|
|
value: [0.71111111 0.71111111 0.71111111 0.68888889 0.75555556 0.71111111
|
|
0.70454545 0.65909091 0.77777778 0.75555556]
|
|
|
|
mean value: 0.7185858585858586
|
|
|
|
key: train_recall
|
|
value: [0.75434243 0.75434243 0.7866005 0.71960298 0.74937965 0.75434243
|
|
0.69059406 0.73267327 0.74689826 0.68486352]
|
|
|
|
mean value: 0.7373639535169398
|
|
|
|
key: test_roc_auc
|
|
value: [0.62222222 0.55555556 0.58888889 0.67777778 0.68888889 0.58888889
|
|
0.68560606 0.65176768 0.62752525 0.67323232]
|
|
|
|
mean value: 0.6360353535353536
|
|
|
|
key: train_roc_auc
|
|
value: [0.65880893 0.65632754 0.66873449 0.63771712 0.64888337 0.66377171
|
|
0.62445336 0.64177088 0.65067685 0.62832285]
|
|
|
|
mean value: 0.6479467115446036
|
|
|
|
key: test_jcc
|
|
value: [0.48484848 0.44444444 0.46376812 0.51666667 0.5483871 0.46376812
|
|
0.52542373 0.48333333 0.51470588 0.53968254]
|
|
|
|
mean value: 0.4985028408800221
|
|
|
|
key: train_jcc
|
|
value: [0.52504318 0.5232358 0.54280822 0.49828179 0.51623932 0.52869565
|
|
0.47938144 0.50598291 0.51629503 0.47916667]
|
|
|
|
mean value: 0.5115129994447575
|
|
|
|
MCC on Blind test: -0.08
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02986503 0.03106165 0.02707005 0.03405428 0.03290462 0.03229713
|
|
0.03070855 0.03472114 0.03327203 0.02605844]
|
|
|
|
mean value: 0.03120129108428955
|
|
|
|
key: score_time
|
|
value: [0.01246309 0.01239991 0.01267004 0.01244092 0.01238871 0.01236868
|
|
0.01241231 0.01266837 0.01246691 0.01261067]
|
|
|
|
mean value: 0.012488961219787598
|
|
|
|
key: test_mcc
|
|
value: [0.69509522 0.76088591 0.74278135 0.72486118 0.76088591 0.80178373
|
|
0.9347507 0.75907212 0.82336086 0.77614967]
|
|
|
|
mean value: 0.7779626641841877
|
|
|
|
key: train_mcc
|
|
value: [0.85163067 0.71369692 0.65838734 0.71964628 0.7012443 0.85416194
|
|
0.89644594 0.83492734 0.86240069 0.87381967]
|
|
|
|
mean value: 0.7966361094273259
|
|
|
|
key: test_accuracy
|
|
value: [0.84444444 0.86666667 0.85555556 0.84444444 0.86666667 0.9
|
|
0.96629213 0.86516854 0.91011236 0.88764045]
|
|
|
|
mean value: 0.8806991260923845
|
|
|
|
key: train_accuracy
|
|
value: [0.92555831 0.83746898 0.8101737 0.84119107 0.83498759 0.92555831
|
|
0.94795539 0.91078067 0.92812887 0.93680297]
|
|
|
|
mean value: 0.8898605871084586
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.88235294 0.83116883 0.86538462 0.88235294 0.89655172
|
|
0.96703297 0.88 0.91489362 0.88636364]
|
|
|
|
mean value: 0.8839434606795532
|
|
|
|
key: train_fscore
|
|
value: [0.92424242 0.8601921 0.77198212 0.86295503 0.85621622 0.92227979
|
|
0.94890511 0.91818182 0.93208431 0.93601004]
|
|
|
|
mean value: 0.8933048958469235
|
|
|
|
key: test_precision
|
|
value: [0.8974359 0.78947368 1. 0.76271186 0.78947368 0.92857143
|
|
0.93617021 0.78571429 0.87755102 0.90697674]
|
|
|
|
mean value: 0.8674078821909611
|
|
|
|
key: train_precision
|
|
value: [0.94087404 0.75468165 0.96641791 0.75894539 0.75862069 0.96476965
|
|
0.93301435 0.8487395 0.88248337 0.94670051]
|
|
|
|
mean value: 0.8755247045560999
|
|
|
|
key: test_recall
|
|
value: [0.77777778 1. 0.71111111 1. 1. 0.86666667
|
|
1. 1. 0.95555556 0.86666667]
|
|
|
|
mean value: 0.9177777777777778
|
|
|
|
key: train_recall
|
|
value: [0.90818859 1. 0.6426799 1. 0.98263027 0.88337469
|
|
0.96534653 1. 0.98759305 0.92555831]
|
|
|
|
mean value: 0.9295371348549247
|
|
|
|
key: test_roc_auc
|
|
value: [0.84444444 0.86666667 0.85555556 0.84444444 0.86666667 0.9
|
|
0.96666667 0.86666667 0.90959596 0.88787879]
|
|
|
|
mean value: 0.8808585858585859
|
|
|
|
key: train_roc_auc
|
|
value: [0.92555831 0.83746898 0.8101737 0.84119107 0.83498759 0.92555831
|
|
0.94793381 0.91066998 0.92820247 0.93678906]
|
|
|
|
mean value: 0.8898533277645382
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.78947368 0.71111111 0.76271186 0.78947368 0.8125
|
|
0.93617021 0.78571429 0.84313725 0.79591837]
|
|
|
|
mean value: 0.7940496178953801
|
|
|
|
key: train_jcc
|
|
value: [0.85915493 0.75468165 0.62864078 0.75894539 0.74858223 0.85576923
|
|
0.90277778 0.8487395 0.87280702 0.87971698]
|
|
|
|
mean value: 0.8109815473925681
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01734114 0.02829599 0.02954578 0.03200293 0.02520967 0.02703738
|
|
0.02638435 0.0349586 0.0289073 0.03014708]
|
|
|
|
mean value: 0.02798302173614502
|
|
|
|
key: score_time
|
|
value: [0.01065016 0.01219821 0.01217985 0.01214528 0.01207781 0.01209116
|
|
0.01213288 0.01234198 0.01292515 0.01211715]
|
|
|
|
mean value: 0.012085962295532226
|
|
|
|
key: test_mcc
|
|
value: [0.66097134 0.71477161 0.67202151 0.8230355 0.6894997 0.73624773
|
|
0.77746025 0.75007366 0.63950435 0.77818261]
|
|
|
|
mean value: 0.7241768267449343
|
|
|
|
key: train_mcc
|
|
value: [0.84222369 0.75700084 0.55007191 0.83673334 0.58976782 0.85185395
|
|
0.75189951 0.77909836 0.70864605 0.92233238]
|
|
|
|
mean value: 0.7589627863666474
|
|
|
|
key: test_accuracy
|
|
value: [0.82222222 0.84444444 0.81111111 0.91111111 0.82222222 0.86666667
|
|
0.87640449 0.86516854 0.79775281 0.88764045]
|
|
|
|
mean value: 0.8504744069912609
|
|
|
|
key: train_accuracy
|
|
value: [0.91811414 0.87096774 0.74069479 0.91811414 0.75806452 0.92431762
|
|
0.86121437 0.88104089 0.84262701 0.96034696]
|
|
|
|
mean value: 0.86755021969676
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.8627451 0.76712329 0.91304348 0.8490566 0.86046512
|
|
0.88888889 0.87755102 0.75675676 0.89361702]
|
|
|
|
mean value: 0.8469247271354378
|
|
|
|
key: train_fscore
|
|
value: [0.91292876 0.88261851 0.65793781 0.9195122 0.80519481 0.92088197
|
|
0.87826087 0.89237668 0.81934566 0.96144578]
|
|
|
|
mean value: 0.8650503044471857
|
|
|
|
key: test_precision
|
|
value: [0.91428571 0.77192982 1. 0.89361702 0.73770492 0.90243902
|
|
0.8 0.7962963 0.96551724 0.85714286]
|
|
|
|
mean value: 0.8638932897365208
|
|
|
|
key: train_precision
|
|
value: [0.97464789 0.80952381 0.96634615 0.90407674 0.67391304 0.96467391
|
|
0.78294574 0.81557377 0.96 0.93442623]
|
|
|
|
mean value: 0.8786127282258868
|
|
|
|
key: test_recall
|
|
value: [0.71111111 0.97777778 0.62222222 0.93333333 1. 0.82222222
|
|
1. 0.97727273 0.62222222 0.93333333]
|
|
|
|
mean value: 0.8599494949494949
|
|
|
|
key: train_recall
|
|
value: [0.85856079 0.97022333 0.49875931 0.93548387 1. 0.8808933
|
|
1. 0.98514851 0.7146402 0.99007444]
|
|
|
|
mean value: 0.8833783750583495
|
|
|
|
key: test_roc_auc
|
|
value: [0.82222222 0.84444444 0.81111111 0.91111111 0.82222222 0.86666667
|
|
0.87777778 0.86641414 0.79974747 0.88712121]
|
|
|
|
mean value: 0.8508838383838384
|
|
|
|
key: train_roc_auc
|
|
value: [0.91811414 0.87096774 0.74069479 0.91811414 0.75806452 0.92431762
|
|
0.86104218 0.88091173 0.84246861 0.96038376]
|
|
|
|
mean value: 0.8675079232489006
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.75862069 0.62222222 0.84 0.73770492 0.75510204
|
|
0.8 0.78181818 0.60869565 0.80769231]
|
|
|
|
mean value: 0.7378522679077577
|
|
|
|
key: train_jcc
|
|
value: [0.83980583 0.78989899 0.4902439 0.8510158 0.67391304 0.85336538
|
|
0.78294574 0.80566802 0.6939759 0.92575406]
|
|
|
|
mean value: 0.7706586663596504
|
|
|
|
MCC on Blind test: -0.08
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22934127 0.21689129 0.2160399 0.2162447 0.2203114 0.21873331
|
|
0.23117566 0.23211575 0.21774817 0.22879505]
|
|
|
|
mean value: 0.22273964881896974
|
|
|
|
key: score_time
|
|
value: [0.01556897 0.01572156 0.01673293 0.01607418 0.01595616 0.01712942
|
|
0.01726222 0.01731992 0.0163548 0.01704502]
|
|
|
|
mean value: 0.016516518592834473
|
|
|
|
key: test_mcc
|
|
value: [0.85485041 0.93541435 0.95650071 0.95650071 0.97801929 0.91473203
|
|
0.89341253 0.85354573 0.97776667 0.97776667]
|
|
|
|
mean value: 0.9298509115975558
|
|
|
|
key: train_mcc
|
|
value: [0.99012321 0.99012321 0.98277854 0.98034206 0.98522086 0.99012321
|
|
0.99013526 0.98523875 0.9901355 0.98768443]
|
|
|
|
mean value: 0.9871905044347934
|
|
|
|
key: test_accuracy
|
|
value: [0.92222222 0.96666667 0.97777778 0.97777778 0.98888889 0.95555556
|
|
0.94382022 0.92134831 0.98876404 0.98876404]
|
|
|
|
mean value: 0.9631585518102372
|
|
|
|
key: train_accuracy
|
|
value: [0.99503722 0.99503722 0.99131514 0.99007444 0.99255583 0.99503722
|
|
0.99504337 0.99256506 0.99504337 0.99380421]
|
|
|
|
mean value: 0.9935513081873557
|
|
|
|
key: test_fscore
|
|
value: [0.92783505 0.96774194 0.97826087 0.97826087 0.98901099 0.95744681
|
|
0.94623656 0.92631579 0.98901099 0.98901099]
|
|
|
|
mean value: 0.9649130850317772
|
|
|
|
key: train_fscore
|
|
value: [0.99506173 0.99506173 0.99138991 0.99017199 0.99261084 0.99506173
|
|
0.99507389 0.99262899 0.99506173 0.99383477]
|
|
|
|
mean value: 0.9935957311230967
|
|
|
|
key: test_precision
|
|
value: [0.86538462 0.9375 0.95744681 0.95744681 0.97826087 0.91836735
|
|
0.89795918 0.8627451 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9331632469753005
|
|
|
|
key: train_precision
|
|
value: [0.99017199 0.99017199 0.98292683 0.98053528 0.98533007 0.99017199
|
|
0.99019608 0.98536585 0.99017199 0.9877451 ]
|
|
|
|
mean value: 0.9872787173240365
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92222222 0.96666667 0.97777778 0.97777778 0.98888889 0.95555556
|
|
0.94444444 0.92222222 0.98863636 0.98863636]
|
|
|
|
mean value: 0.9632828282828283
|
|
|
|
key: train_roc_auc
|
|
value: [0.99503722 0.99503722 0.99131514 0.99007444 0.99255583 0.99503722
|
|
0.99503722 0.99255583 0.9950495 0.99381188]
|
|
|
|
mean value: 0.9935511510208093
|
|
|
|
key: test_jcc
|
|
value: [0.86538462 0.9375 0.95744681 0.95744681 0.97826087 0.91836735
|
|
0.89795918 0.8627451 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9331632469753005
|
|
|
|
key: train_jcc
|
|
value: [0.99017199 0.99017199 0.98292683 0.98053528 0.98533007 0.99017199
|
|
0.99019608 0.98536585 0.99017199 0.9877451 ]
|
|
|
|
mean value: 0.9872787173240365
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19848394 0.20910358 0.20186234 0.21240187 0.21274734 0.19113708
|
|
0.20238543 0.21005321 0.20346332 0.10438156]
|
|
|
|
mean value: 0.19460196495056153
|
|
|
|
key: score_time
|
|
value: [0.04009843 0.03762341 0.04046917 0.03854799 0.04065156 0.03484154
|
|
0.03711557 0.03895879 0.04041505 0.01881981]
|
|
|
|
mean value: 0.03675413131713867
|
|
|
|
key: test_mcc
|
|
value: [0.95650071 0.93541435 0.91473203 0.95650071 0.97801929 0.91473203
|
|
0.87330789 0.95603853 0.97776667 0.97776667]
|
|
|
|
mean value: 0.9440778897705091
|
|
|
|
key: train_mcc
|
|
value: [0.99752168 0.99258333 1. 0.99504947 1. 0.99752168
|
|
1. 0.99259241 0.99505562 0.99752475]
|
|
|
|
mean value: 0.9967848951804916
|
|
|
|
key: test_accuracy
|
|
value: [0.97777778 0.96666667 0.95555556 0.97777778 0.98888889 0.95555556
|
|
0.93258427 0.97752809 0.98876404 0.98876404]
|
|
|
|
mean value: 0.9709862671660424
|
|
|
|
key: train_accuracy
|
|
value: [0.99875931 0.99627792 1. 0.99751861 1. 0.99875931
|
|
1. 0.99628253 0.99752169 0.99876084]
|
|
|
|
mean value: 0.9983880192238509
|
|
|
|
key: test_fscore
|
|
value: [0.97826087 0.96774194 0.95744681 0.97826087 0.98901099 0.95744681
|
|
0.93617021 0.97777778 0.98901099 0.98901099]
|
|
|
|
mean value: 0.9720138249212285
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.99876084 0.99629172 1. 0.99752475 1. 0.99876084
|
|
1. 0.99630086 0.99752475 0.99876084]
|
|
|
|
mean value: 0.9983924614134052
|
|
|
|
key: test_precision
|
|
value: [0.95744681 0.9375 0.91836735 0.95744681 0.97826087 0.91836735
|
|
0.88 0.95652174 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9460432658724914
|
|
|
|
key: train_precision
|
|
value: [0.99752475 0.99261084 1. 0.99506173 1. 0.99752475
|
|
1. 0.99262899 0.99506173 0.99752475]
|
|
|
|
mean value: 0.9967937544283282
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97777778 0.96666667 0.95555556 0.97777778 0.98888889 0.95555556
|
|
0.93333333 0.97777778 0.98863636 0.98863636]
|
|
|
|
mean value: 0.9710606060606061
|
|
|
|
key: train_roc_auc
|
|
value: [0.99875931 0.99627792 1. 0.99751861 1. 0.99875931
|
|
1. 0.99627792 0.99752475 0.99876238]
|
|
|
|
mean value: 0.9983880180822052
|
|
|
|
key: test_jcc
|
|
value: [0.95744681 0.9375 0.91836735 0.95744681 0.97826087 0.91836735
|
|
0.88 0.95652174 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9460432658724914
|
|
|
|
key: train_jcc
|
|
value: [0.99752475 0.99261084 1. 0.99506173 1. 0.99752475
|
|
1. 0.99262899 0.99506173 0.99752475]
|
|
|
|
mean value: 0.9967937544283282
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.43886423 0.4760623 0.36390519 0.53684354 0.38435268 0.35911202
|
|
0.37094688 0.47626138 0.4179194 0.40525746]
|
|
|
|
mean value: 0.4229525089263916
|
|
|
|
key: score_time
|
|
value: [0.03428149 0.01966357 0.01971436 0.03443074 0.01980209 0.04024911
|
|
0.01956487 0.04836321 0.03411841 0.01963758]
|
|
|
|
mean value: 0.0289825439453125
|
|
|
|
key: test_mcc
|
|
value: [0.87447463 0.83553169 0.85485041 0.87447463 0.87447463 0.89442719
|
|
0.95603853 0.91388467 0.95599503 0.81417368]
|
|
|
|
mean value: 0.8848325100959881
|
|
|
|
key: train_mcc
|
|
value: [0.97306727 0.97548647 0.97306727 0.97791139 0.97548647 0.97065374
|
|
0.97309927 0.97309927 0.97310106 0.97793903]
|
|
|
|
mean value: 0.9742911211486898
|
|
|
|
key: test_accuracy
|
|
value: [0.93333333 0.91111111 0.92222222 0.93333333 0.93333333 0.94444444
|
|
0.97752809 0.95505618 0.97752809 0.8988764 ]
|
|
|
|
mean value: 0.9386766541822722
|
|
|
|
key: train_accuracy
|
|
value: [0.98635236 0.98759305 0.98635236 0.98883375 0.98759305 0.98511166
|
|
0.98636927 0.98636927 0.98636927 0.98884758]
|
|
|
|
mean value: 0.9869791618622413
|
|
|
|
key: test_fscore
|
|
value: [0.9375 0.91836735 0.92783505 0.9375 0.9375 0.94736842
|
|
0.97777778 0.95652174 0.97826087 0.90909091]
|
|
|
|
mean value: 0.9427722115102138
|
|
|
|
key: train_fscore
|
|
value: [0.98653611 0.9877451 0.98653611 0.98895706 0.9877451 0.98533007
|
|
0.98656899 0.98656899 0.98653611 0.98895706]
|
|
|
|
mean value: 0.9871480676128901
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.8490566 0.86538462 0.88235294 0.88235294 0.9
|
|
0.95652174 0.91666667 0.95744681 0.83333333]
|
|
|
|
mean value: 0.8925468590328686
|
|
|
|
key: train_precision
|
|
value: [0.97342995 0.97578692 0.97342995 0.97815534 0.97578692 0.97108434
|
|
0.97349398 0.97349398 0.97342995 0.97815534]
|
|
|
|
mean value: 0.9746246673719675
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93333333 0.91111111 0.92222222 0.93333333 0.93333333 0.94444444
|
|
0.97777778 0.95555556 0.97727273 0.89772727]
|
|
|
|
mean value: 0.9386111111111111
|
|
|
|
key: train_roc_auc
|
|
value: [0.98635236 0.98759305 0.98635236 0.98883375 0.98759305 0.98511166
|
|
0.98635236 0.98635236 0.98638614 0.98886139]
|
|
|
|
mean value: 0.9869788467680515
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 0.8490566 0.86538462 0.88235294 0.88235294 0.9
|
|
0.95652174 0.91666667 0.95744681 0.83333333]
|
|
|
|
mean value: 0.8925468590328686
|
|
|
|
key: train_jcc
|
|
value: [0.97342995 0.97578692 0.97342995 0.97815534 0.97578692 0.97108434
|
|
0.97349398 0.97349398 0.97342995 0.97815534]
|
|
|
|
mean value: 0.9746246673719675
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.88218331 0.86560535 0.87006712 0.86058569 0.86122131 0.86117339
|
|
0.86112857 0.85846663 0.86184955 0.86688995]
|
|
|
|
mean value: 0.8649170875549317
|
|
|
|
key: score_time
|
|
value: [0.00958872 0.00934148 0.00954866 0.00948191 0.00962186 0.0094862
|
|
0.00946331 0.00944281 0.00964046 0.00951099]
|
|
|
|
mean value: 0.009512639045715332
|
|
|
|
key: test_mcc
|
|
value: [0.93541435 0.93541435 0.91473203 0.91473203 0.95650071 0.89442719
|
|
0.87330789 0.91388467 0.97776667 0.95599503]
|
|
|
|
mean value: 0.9272174926275004
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96666667 0.96666667 0.95555556 0.95555556 0.97777778 0.94444444
|
|
0.93258427 0.95505618 0.98876404 0.97752809]
|
|
|
|
mean value: 0.9620599250936329
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96774194 0.96774194 0.95744681 0.95744681 0.97826087 0.94736842
|
|
0.93617021 0.95652174 0.98901099 0.97826087]
|
|
|
|
mean value: 0.9635970589079467
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9375 0.9375 0.91836735 0.91836735 0.95744681 0.9
|
|
0.88 0.91666667 0.97826087 0.95744681]
|
|
|
|
mean value: 0.9301555847130711
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96666667 0.96666667 0.95555556 0.95555556 0.97777778 0.94444444
|
|
0.93333333 0.95555556 0.98863636 0.97727273]
|
|
|
|
mean value: 0.9621464646464646
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9375 0.9375 0.91836735 0.91836735 0.95744681 0.9
|
|
0.88 0.91666667 0.97826087 0.95744681]
|
|
|
|
mean value: 0.9301555847130711
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03946662 0.03821182 0.03839874 0.03794003 0.04421806 0.04088974
|
|
0.03815103 0.03820896 0.04565883 0.04474592]
|
|
|
|
mean value: 0.040588974952697754
|
|
|
|
key: score_time
|
|
value: [0.01251912 0.01273608 0.01280117 0.01617122 0.01279855 0.0130713
|
|
0.01278114 0.01284242 0.01276612 0.01297808]
|
|
|
|
mean value: 0.013146519660949707
|
|
|
|
key: test_mcc
|
|
value: [0.97801929 1. 1. 1. 0.97801929 0.93541435
|
|
1. 1. 1. 0.97777778]
|
|
|
|
mean value: 0.9869230712158567
|
|
|
|
key: train_mcc
|
|
value: [0.98522086 0.97791139 1. 1. 0.98766907 0.94211879
|
|
0.93286141 1. 1. 0.95633995]
|
|
|
|
mean value: 0.9782121475544329
|
|
|
|
key: test_accuracy
|
|
value: [0.98888889 1. 1. 1. 0.98888889 0.96666667
|
|
1. 1. 1. 0.98876404]
|
|
|
|
mean value: 0.9933208489388264
|
|
|
|
key: train_accuracy
|
|
value: [0.99255583 0.98883375 1. 1. 0.99379653 0.97022333
|
|
0.96530359 1. 1. 0.97769517]
|
|
|
|
mean value: 0.9888408190123024
|
|
|
|
key: test_fscore
|
|
value: [0.98876404 1. 1. 1. 0.98876404 0.96551724
|
|
1. 1. 1. 0.98876404]
|
|
|
|
mean value: 0.9931809376210771
|
|
|
|
key: train_fscore
|
|
value: [0.9925 0.98870765 1. 1. 0.9937578 0.96930946
|
|
0.96410256 1. 1. 0.97715736]
|
|
|
|
mean value: 0.9885534843872203
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97777778 1. 1. 1. 0.97777778 0.93333333
|
|
1. 1. 1. 0.97777778]
|
|
|
|
mean value: 0.9866666666666667
|
|
|
|
key: train_recall
|
|
value: [0.98511166 0.97766749 1. 1. 0.98759305 0.94044665
|
|
0.93069307 1. 1. 0.95533499]
|
|
|
|
mean value: 0.9776846915460777
|
|
|
|
key: test_roc_auc
|
|
value: [0.98888889 1. 1. 1. 0.98888889 0.96666667
|
|
1. 1. 1. 0.98888889]
|
|
|
|
mean value: 0.9933333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.99255583 0.98883375 1. 1. 0.99379653 0.97022333
|
|
0.96534653 1. 1. 0.97766749]
|
|
|
|
mean value: 0.9888423457730389
|
|
|
|
key: test_jcc
|
|
value: [0.97777778 1. 1. 1. 0.97777778 0.93333333
|
|
1. 1. 1. 0.97777778]
|
|
|
|
mean value: 0.9866666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.98511166 0.97766749 1. 1. 0.98759305 0.94044665
|
|
0.93069307 1. 1. 0.95533499]
|
|
|
|
mean value: 0.9776846915460777
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01768303 0.01804614 0.01798201 0.01798105 0.01799393 0.01805568
|
|
0.04034901 0.01771379 0.02316856 0.04743743]
|
|
|
|
mean value: 0.023641061782836915
|
|
|
|
key: score_time
|
|
value: [0.04107428 0.01211309 0.01212001 0.01213837 0.01210546 0.01213598
|
|
0.01908278 0.01219654 0.01217341 0.018929 ]
|
|
|
|
mean value: 0.016406893730163574
|
|
|
|
key: test_mcc
|
|
value: [0.69509522 0.81649658 0.81649658 0.85485041 0.89442719 0.84970583
|
|
0.85354573 0.82801395 0.82336086 0.71626222]
|
|
|
|
mean value: 0.8148254569751476
|
|
|
|
key: train_mcc
|
|
value: [0.87141281 0.88174356 0.8748657 0.8748657 0.86802748 0.87528995
|
|
0.8750022 0.88226653 0.84329319 0.86963008]
|
|
|
|
mean value: 0.8716397190086489
|
|
|
|
key: test_accuracy
|
|
value: [0.84444444 0.9 0.9 0.92222222 0.94444444 0.92222222
|
|
0.92134831 0.91011236 0.91011236 0.85393258]
|
|
|
|
mean value: 0.9028838951310861
|
|
|
|
key: train_accuracy
|
|
value: [0.93424318 0.93920596 0.93548387 0.93548387 0.93176179 0.93548387
|
|
0.93556382 0.93928129 0.91945477 0.93184634]
|
|
|
|
mean value: 0.9337808751587382
|
|
|
|
key: test_fscore
|
|
value: [0.85416667 0.90909091 0.90909091 0.92783505 0.94736842 0.92631579
|
|
0.92631579 0.91489362 0.91489362 0.86597938]
|
|
|
|
mean value: 0.9095950151880728
|
|
|
|
key: train_fscore
|
|
value: [0.93682956 0.94173603 0.93838863 0.93838863 0.93506494 0.93853428
|
|
0.93853428 0.94201183 0.92325856 0.93552169]
|
|
|
|
mean value: 0.9368268413806814
|
|
|
|
key: test_precision
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.80392157 0.83333333 0.83333333 0.86538462 0.9 0.88
|
|
0.8627451 0.86 0.87755102 0.80769231]
|
|
|
|
mean value: 0.852396127681842
|
|
|
|
key: train_precision
|
|
value: [0.90137615 0.90410959 0.89795918 0.89795918 0.89189189 0.89616253
|
|
0.89819005 0.90249433 0.88063063 0.88666667]
|
|
|
|
mean value: 0.8957440196897547
|
|
|
|
key: test_recall
|
|
value: [0.91111111 1. 1. 1. 1. 0.97777778
|
|
1. 0.97727273 0.95555556 0.93333333]
|
|
|
|
mean value: 0.9755050505050505
|
|
|
|
key: train_recall
|
|
value: [0.9751861 0.98263027 0.98263027 0.98263027 0.98263027 0.98511166
|
|
0.98267327 0.98514851 0.97022333 0.99007444]
|
|
|
|
mean value: 0.9818938407488391
|
|
|
|
key: test_roc_auc
|
|
value: [0.84444444 0.9 0.9 0.92222222 0.94444444 0.92222222
|
|
0.92222222 0.91085859 0.90959596 0.8530303 ]
|
|
|
|
mean value: 0.9029040404040404
|
|
|
|
key: train_roc_auc
|
|
value: [0.93424318 0.93920596 0.93548387 0.93548387 0.93176179 0.93548387
|
|
0.93550537 0.93922438 0.9195176 0.93191841]
|
|
|
|
mean value: 0.9337828292754833
|
|
|
|
key: test_jcc
|
|
value: [0.74545455 0.83333333 0.83333333 0.86538462 0.9 0.8627451
|
|
0.8627451 0.84313725 0.84313725 0.76363636]
|
|
|
|
mean value: 0.8352906897024545
|
|
|
|
key: train_jcc
|
|
value: [0.88116592 0.88988764 0.88392857 0.88392857 0.87804878 0.88418708
|
|
0.88418708 0.89038031 0.85745614 0.87885463]
|
|
|
|
mean value: 0.881202472698823
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.27768993 0.2098248 0.26158571 0.3298912 0.32260323 0.31526113
|
|
0.40353942 0.28225064 0.32685328 0.26789188]
|
|
|
|
mean value: 0.29973912239074707
|
|
|
|
key: score_time
|
|
value: [0.01227522 0.01871467 0.0190227 0.01896334 0.01897502 0.01907015
|
|
0.02246594 0.01926565 0.01242399 0.0192759 ]
|
|
|
|
mean value: 0.018045258522033692
|
|
|
|
key: test_mcc
|
|
value: [0.71554175 0.80985829 0.83553169 0.85485041 0.87447463 0.84970583
|
|
0.85354573 0.82801395 0.87294449 0.71626222]
|
|
|
|
mean value: 0.8210728990103431
|
|
|
|
key: train_mcc
|
|
value: [0.86910355 0.87541607 0.8748657 0.8748657 0.8748657 0.87756812
|
|
0.8750022 0.88226653 0.86141241 0.88088382]
|
|
|
|
mean value: 0.87462498037913
|
|
|
|
key: test_accuracy
|
|
value: [0.85555556 0.9 0.91111111 0.92222222 0.93333333 0.92222222
|
|
0.92134831 0.91011236 0.93258427 0.85393258]
|
|
|
|
mean value: 0.9062421972534332
|
|
|
|
key: train_accuracy
|
|
value: [0.93300248 0.93672457 0.93548387 0.93548387 0.93548387 0.93672457
|
|
0.93556382 0.93928129 0.92812887 0.93804213]
|
|
|
|
mean value: 0.93539193348523
|
|
|
|
key: test_fscore
|
|
value: [0.86315789 0.90721649 0.91836735 0.92783505 0.9375 0.92631579
|
|
0.92631579 0.91489362 0.9375 0.86597938]
|
|
|
|
mean value: 0.9125081365479314
|
|
|
|
key: train_fscore
|
|
value: [0.93571429 0.93877551 0.93838863 0.93838863 0.93838863 0.93964497
|
|
0.93853428 0.94201183 0.93176471 0.94103774]
|
|
|
|
mean value: 0.9382649198120566
|
|
|
|
key: test_precision
|
|
value: [0.82 0.84615385 0.8490566 0.86538462 0.88235294 0.88
|
|
0.8627451 0.86 0.88235294 0.80769231]
|
|
|
|
mean value: 0.8555738353396511
|
|
|
|
key: train_precision
|
|
value: [0.8993135 0.90930233 0.89795918 0.89795918 0.89795918 0.89819005
|
|
0.89819005 0.90249433 0.88590604 0.89662921]
|
|
|
|
mean value: 0.8983903053061068
|
|
|
|
key: test_recall
|
|
value: [0.91111111 0.97777778 1. 1. 1. 0.97777778
|
|
1. 0.97727273 1. 0.93333333]
|
|
|
|
mean value: 0.9777272727272728
|
|
|
|
key: train_recall
|
|
value: [0.9751861 0.97022333 0.98263027 0.98263027 0.98263027 0.98511166
|
|
0.98267327 0.98514851 0.98263027 0.99007444]
|
|
|
|
mean value: 0.9818938407488391
|
|
|
|
key: test_roc_auc
|
|
value: [0.85555556 0.9 0.91111111 0.92222222 0.93333333 0.92222222
|
|
0.92222222 0.91085859 0.93181818 0.8530303 ]
|
|
|
|
mean value: 0.9062373737373738
|
|
|
|
key: train_roc_auc
|
|
value: [0.93300248 0.93672457 0.93548387 0.93548387 0.93548387 0.93672457
|
|
0.93550537 0.93922438 0.92819632 0.93810653]
|
|
|
|
mean value: 0.9353935827825958
|
|
|
|
key: test_jcc
|
|
value: [0.75925926 0.83018868 0.8490566 0.86538462 0.88235294 0.8627451
|
|
0.8627451 0.84313725 0.88235294 0.76363636]
|
|
|
|
mean value: 0.840085885463244
|
|
|
|
key: train_jcc
|
|
value: [0.87919463 0.88461538 0.88392857 0.88392857 0.88392857 0.88616071
|
|
0.88418708 0.89038031 0.8722467 0.88864143]
|
|
|
|
mean value: 0.8837211961088743
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02103758 0.02294707 0.0247767 0.02448654 0.02558327 0.02047729
|
|
0.02412271 0.02387452 0.02471924 0.02308488]
|
|
|
|
mean value: 0.0235109806060791
|
|
|
|
key: score_time
|
|
value: [0.01163149 0.01159573 0.0114634 0.01148629 0.0115242 0.01160526
|
|
0.01145101 0.01155806 0.01148725 0.01145792]
|
|
|
|
mean value: 0.011526060104370118
|
|
|
|
key: test_mcc
|
|
value: [0.55 0.79056942 0.31622777 0.35 0.5 0.25819889
|
|
1. 0. 0. 0.57735027]
|
|
|
|
mean value: 0.43423463399957196
|
|
|
|
key: train_mcc
|
|
value: [0.86690413 0.86690413 0.89331437 0.94804294 0.89473684 0.86872191
|
|
0.86872191 0.94736842 0.87114007 0.86872191]
|
|
|
|
mean value: 0.8894576637013004
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.66666667 0.66666667 0.75 0.625
|
|
1. 0.5 0.5 0.75 ]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_accuracy
|
|
value: [0.93333333 0.93333333 0.94666667 0.97333333 0.94736842 0.93421053
|
|
0.93421053 0.97368421 0.93421053 0.93421053]
|
|
|
|
mean value: 0.9444561403508772
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.85714286 0.72727273 0.66666667 0.75 0.57142857
|
|
1. 0.6 0.5 0.66666667]
|
|
|
|
mean value: 0.708917748917749
|
|
|
|
key: train_fscore
|
|
value: [0.93506494 0.93506494 0.94594595 0.97368421 0.94736842 0.93506494
|
|
0.93333333 0.97368421 0.93670886 0.93506494]
|
|
|
|
mean value: 0.9450984722403777
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.66666667 0.75 0.75 0.66666667
|
|
1. 0.5 0.5 1. ]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_precision
|
|
value: [0.92307692 0.92307692 0.94594595 0.94871795 0.94736842 0.92307692
|
|
0.94594595 0.97368421 0.90243902 0.92307692]
|
|
|
|
mean value: 0.9356409188886724
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.8 0.6 0.75 0.5 1. 0.75 0.5 0.5 ]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_recall
|
|
value: [0.94736842 0.94736842 0.94594595 1. 0.94736842 0.94736842
|
|
0.92105263 0.97368421 0.97368421 0.94736842]
|
|
|
|
mean value: 0.9551209103840682
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.875 0.65 0.675 0.75 0.625 1. 0.5 0.5 0.75 ]
|
|
|
|
mean value: 0.71
|
|
|
|
key: train_roc_auc
|
|
value: [0.93314367 0.93314367 0.94665718 0.97368421 0.94736842 0.93421053
|
|
0.93421053 0.97368421 0.93421053 0.93421053]
|
|
|
|
mean value: 0.944452347083926
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.75 0.57142857 0.5 0.6 0.4
|
|
1. 0.42857143 0.33333333 0.5 ]
|
|
|
|
mean value: 0.5683333333333334
|
|
|
|
key: train_jcc
|
|
value: [0.87804878 0.87804878 0.8974359 0.94871795 0.9 0.87804878
|
|
0.875 0.94871795 0.88095238 0.87804878]
|
|
|
|
mean value: 0.8963019297775395
|
|
|
|
MCC on Blind test: -0.25
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.65937138 0.55321765 0.59056425 0.52551985 0.76420236 0.56976175
|
|
0.57840157 0.73641253 0.55280399 0.56884384]
|
|
|
|
mean value: 0.6099099159240723
|
|
|
|
key: score_time
|
|
value: [0.01185989 0.01992059 0.01180172 0.01270127 0.01272321 0.01193523
|
|
0.01201153 0.0148077 0.01197076 0.01273251]
|
|
|
|
mean value: 0.013246440887451172
|
|
|
|
key: test_mcc
|
|
value: [ 0.55 1. 0.31622777 0.35 0.77459667 0.
|
|
1. -0.25819889 -0.25819889 0.57735027]
|
|
|
|
mean value: 0.4051776924953625
|
|
|
|
key: train_mcc
|
|
value: [0.89331437 0.89331437 0.89331437 1. 1. 1.
|
|
0.92137172 1. 0.94736842 0.92137172]
|
|
|
|
mean value: 0.9470054964772662
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 1. 0.66666667 0.66666667 0.875 0.5
|
|
1. 0.375 0.375 0.75 ]
|
|
|
|
mean value: 0.6986111111111111
|
|
|
|
key: train_accuracy
|
|
value: [0.94666667 0.94666667 0.94666667 1. 1. 1.
|
|
0.96052632 1. 0.97368421 0.96052632]
|
|
|
|
mean value: 0.9734736842105263
|
|
|
|
key: test_fscore
|
|
value: [0.75 1. 0.72727273 0.66666667 0.88888889 0.33333333
|
|
1. 0.44444444 0.44444444 0.66666667]
|
|
|
|
mean value: 0.6921717171717171
|
|
|
|
key: train_fscore
|
|
value: [0.94736842 0.94736842 0.94594595 1. 1. 1.
|
|
0.96103896 1. 0.97368421 0.96 ]
|
|
|
|
mean value: 0.9735405959616485
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.66666667 0.75 0.8 0.5
|
|
1. 0.4 0.4 1. ]
|
|
|
|
mean value: 0.7266666666666667
|
|
|
|
key: train_precision
|
|
value: [0.94736842 0.94736842 0.94594595 1. 1. 1.
|
|
0.94871795 1. 0.97368421 0.97297297]
|
|
|
|
mean value: 0.9736057920268446
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.8 0.6 1. 0.25 1. 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_recall
|
|
value: [0.94736842 0.94736842 0.94594595 1. 1. 1.
|
|
0.97368421 1. 0.97368421 0.94736842]
|
|
|
|
mean value: 0.9735419630156472
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 1. 0.65 0.675 0.875 0.5 1. 0.375 0.375 0.75 ]
|
|
|
|
mean value: 0.6975
|
|
|
|
key: train_roc_auc
|
|
value: [0.94665718 0.94665718 0.94665718 1. 1. 1.
|
|
0.96052632 1. 0.97368421 0.96052632]
|
|
|
|
mean value: 0.973470839260313
|
|
|
|
key: test_jcc
|
|
value: [0.6 1. 0.57142857 0.5 0.8 0.2
|
|
1. 0.28571429 0.28571429 0.5 ]
|
|
|
|
mean value: 0.5742857142857143
|
|
|
|
key: train_jcc
|
|
value: [0.9 0.9 0.8974359 1. 1. 1.
|
|
0.925 1. 0.94871795 0.92307692]
|
|
|
|
mean value: 0.9494230769230769
|
|
|
|
MCC on Blind test: -0.25
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0122509 0.01144028 0.00966835 0.00948572 0.00925255 0.00932407
|
|
0.00848269 0.00860906 0.00928736 0.00866604]
|
|
|
|
mean value: 0.00964670181274414
|
|
|
|
key: score_time
|
|
value: [0.01159668 0.00967145 0.00964308 0.00929403 0.00936723 0.00896072
|
|
0.0088923 0.00872135 0.00876451 0.00852036]
|
|
|
|
mean value: 0.009343171119689941
|
|
|
|
key: test_mcc
|
|
value: [-0.1 0.5976143 0.1 0.63245553 0.25819889 0.
|
|
0. 0. 0.25819889 0.57735027]
|
|
|
|
mean value: 0.23238178853848207
|
|
|
|
key: train_mcc
|
|
value: [0.6002845 0.68947215 0.62613307 0.62967232 0.69989647 0.68803296
|
|
0.55708601 0.55747847 0.65465367 0.66934944]
|
|
|
|
mean value: 0.6372059063203102
|
|
|
|
key: test_accuracy
|
|
value: [0.44444444 0.77777778 0.55555556 0.77777778 0.625 0.5
|
|
0.5 0.5 0.625 0.75 ]
|
|
|
|
mean value: 0.6055555555555555
|
|
|
|
key: train_accuracy
|
|
value: [0.8 0.84 0.8 0.81333333 0.84210526 0.84210526
|
|
0.73684211 0.77631579 0.81578947 0.82894737]
|
|
|
|
mean value: 0.8095438596491228
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.66666667 0.6 0.75 0.66666667 0.5
|
|
0.5 0.6 0.57142857 0.66666667]
|
|
|
|
mean value: 0.5965873015873016
|
|
|
|
key: train_fscore
|
|
value: [0.8 0.82857143 0.76190476 0.8 0.82352941 0.83333333
|
|
0.64285714 0.76056338 0.78787879 0.8115942 ]
|
|
|
|
mean value: 0.7850232449490402
|
|
|
|
key: test_precision
|
|
value: [0.4 1. 0.6 1. 0.6 0.5
|
|
0.5 0.5 0.66666667 1. ]
|
|
|
|
mean value: 0.6766666666666666
|
|
|
|
key: train_precision
|
|
value: [0.81081081 0.90625 0.92307692 0.84848485 0.93333333 0.88235294
|
|
1. 0.81818182 0.92857143 0.90322581]
|
|
|
|
mean value: 0.8954287910087246
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.6 0.6 0.75 0.5 0.5 0.75 0.5 0.5 ]
|
|
|
|
mean value: 0.57
|
|
|
|
key: train_recall
|
|
value: [0.78947368 0.76315789 0.64864865 0.75675676 0.73684211 0.78947368
|
|
0.47368421 0.71052632 0.68421053 0.73684211]
|
|
|
|
mean value: 0.7089615931721195
|
|
|
|
key: test_roc_auc
|
|
value: [0.45 0.75 0.55 0.8 0.625 0.5 0.5 0.5 0.625 0.75 ]
|
|
|
|
mean value: 0.605
|
|
|
|
key: train_roc_auc
|
|
value: [0.80014225 0.84103841 0.79800853 0.8125889 0.84210526 0.84210526
|
|
0.73684211 0.77631579 0.81578947 0.82894737]
|
|
|
|
mean value: 0.8093883357041252
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.5 0.42857143 0.6 0.5 0.33333333
|
|
0.33333333 0.42857143 0.4 0.5 ]
|
|
|
|
mean value: 0.4309523809523809
|
|
|
|
key: train_jcc
|
|
value: [0.66666667 0.70731707 0.61538462 0.66666667 0.7 0.71428571
|
|
0.47368421 0.61363636 0.65 0.68292683]
|
|
|
|
mean value: 0.6490568139605367
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00870633 0.00863886 0.00855923 0.00862169 0.00936079 0.00853038
|
|
0.00854731 0.00861502 0.00922179 0.00853729]
|
|
|
|
mean value: 0.008733868598937988
|
|
|
|
key: score_time
|
|
value: [0.00897717 0.00906825 0.00857735 0.00857782 0.00875425 0.00851941
|
|
0.00871444 0.00857997 0.0085578 0.00858068]
|
|
|
|
mean value: 0.008690714836120605
|
|
|
|
key: test_mcc
|
|
value: [ 0.1 0.63245553 0.31622777 0.1 0.25819889 -0.37796447
|
|
1. 0. 0.57735027 0. ]
|
|
|
|
mean value: 0.26062679839780734
|
|
|
|
key: train_mcc
|
|
value: [0.60970498 0.65855734 0.64236775 0.60670651 0.6599546 0.63960215
|
|
0.63960215 0.63510735 0.69290233 0.62298833]
|
|
|
|
mean value: 0.6407493474075491
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.77777778 0.66666667 0.55555556 0.625 0.375
|
|
1. 0.5 0.75 0.5 ]
|
|
|
|
mean value: 0.6305555555555555
|
|
|
|
key: train_accuracy
|
|
value: [0.8 0.82666667 0.81333333 0.78666667 0.82894737 0.81578947
|
|
0.81578947 0.81578947 0.84210526 0.80263158]
|
|
|
|
mean value: 0.8147719298245614
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.8 0.72727273 0.6 0.66666667 0.54545455
|
|
1. 0.5 0.8 0.6 ]
|
|
|
|
mean value: 0.673939393939394
|
|
|
|
key: train_fscore
|
|
value: [0.81927711 0.83950617 0.82926829 0.81395349 0.83544304 0.82926829
|
|
0.82926829 0.825 0.85365854 0.82352941]
|
|
|
|
mean value: 0.829817263401887
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.66666667 0.6 0.6 0.42857143
|
|
1. 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: 0.6128571428571429
|
|
|
|
key: train_precision
|
|
value: [0.75555556 0.79069767 0.75555556 0.71428571 0.80487805 0.77272727
|
|
0.77272727 0.78571429 0.79545455 0.74468085]
|
|
|
|
mean value: 0.7692276776283125
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 0.8 0.6 0.75 0.75 1. 0.5 1. 0.75]
|
|
|
|
mean value: 0.765
|
|
|
|
key: train_recall
|
|
value: [0.89473684 0.89473684 0.91891892 0.94594595 0.86842105 0.89473684
|
|
0.89473684 0.86842105 0.92105263 0.92105263]
|
|
|
|
mean value: 0.902275960170697
|
|
|
|
key: test_roc_auc
|
|
value: [0.55 0.8 0.65 0.55 0.625 0.375 1. 0.5 0.75 0.5 ]
|
|
|
|
mean value: 0.63
|
|
|
|
key: train_roc_auc
|
|
value: [0.79871977 0.8257468 0.81472262 0.78876245 0.82894737 0.81578947
|
|
0.81578947 0.81578947 0.84210526 0.80263158]
|
|
|
|
mean value: 0.814900426742532
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.66666667 0.57142857 0.42857143 0.5 0.375
|
|
1. 0.33333333 0.66666667 0.42857143]
|
|
|
|
mean value: 0.5303571428571429
|
|
|
|
key: train_jcc
|
|
value: [0.69387755 0.72340426 0.70833333 0.68627451 0.7173913 0.70833333
|
|
0.70833333 0.70212766 0.74468085 0.7 ]
|
|
|
|
mean value: 0.7092756131129603
|
|
|
|
MCC on Blind test: -0.28
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00960326 0.00895739 0.00811648 0.00800729 0.00796413 0.00803542
|
|
0.00806952 0.00834846 0.00801969 0.0081718 ]
|
|
|
|
mean value: 0.008329343795776368
|
|
|
|
key: score_time
|
|
value: [0.0148859 0.01019406 0.00901318 0.00901461 0.00892067 0.00902057
|
|
0.00901604 0.0090034 0.01032948 0.00906658]
|
|
|
|
mean value: 0.00984644889831543
|
|
|
|
key: test_mcc
|
|
value: [ 0.31622777 0.55 0.1 0.1 0.57735027 0.25819889
|
|
1. -0.25819889 0. -0.25819889]
|
|
|
|
mean value: 0.23853791454593026
|
|
|
|
key: train_mcc
|
|
value: [0.52357624 0.63072008 0.60143409 0.60000015 0.58630197 0.61580149
|
|
0.55747847 0.55747847 0.6599546 0.60715823]
|
|
|
|
mean value: 0.593990377664705
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.77777778 0.55555556 0.55555556 0.75 0.625
|
|
1. 0.375 0.5 0.375 ]
|
|
|
|
mean value: 0.6180555555555556
|
|
|
|
key: train_accuracy
|
|
value: [0.76 0.81333333 0.8 0.8 0.78947368 0.80263158
|
|
0.77631579 0.77631579 0.82894737 0.80263158]
|
|
|
|
mean value: 0.7949649122807018
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.75 0.6 0.6 0.8 0.66666667
|
|
1. 0.28571429 0.33333333 0.28571429]
|
|
|
|
mean value: 0.5892857142857143
|
|
|
|
key: train_fscore
|
|
value: [0.75 0.80555556 0.78873239 0.79452055 0.77142857 0.7826087
|
|
0.76056338 0.76056338 0.82191781 0.79452055]
|
|
|
|
mean value: 0.7830410881675467
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.6 0.6 0.66666667 0.6
|
|
1. 0.33333333 0.5 0.33333333]
|
|
|
|
mean value: 0.605
|
|
|
|
key: train_precision
|
|
value: [0.79411765 0.85294118 0.82352941 0.80555556 0.84375 0.87096774
|
|
0.81818182 0.81818182 0.85714286 0.82857143]
|
|
|
|
mean value: 0.831293945486308
|
|
|
|
key: test_recall
|
|
value: [0.5 0.75 0.6 0.6 1. 0.75 1. 0.25 0.25 0.25]
|
|
|
|
mean value: 0.595
|
|
|
|
key: train_recall
|
|
value: [0.71052632 0.76315789 0.75675676 0.78378378 0.71052632 0.71052632
|
|
0.71052632 0.71052632 0.78947368 0.76315789]
|
|
|
|
mean value: 0.740896159317212
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.775 0.55 0.55 0.75 0.625 1. 0.375 0.5 0.375]
|
|
|
|
mean value: 0.615
|
|
|
|
key: train_roc_auc
|
|
value: [0.76066856 0.81401138 0.79943101 0.79978663 0.78947368 0.80263158
|
|
0.77631579 0.77631579 0.82894737 0.80263158]
|
|
|
|
mean value: 0.7950213371266003
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.6 0.42857143 0.42857143 0.66666667 0.5
|
|
1. 0.16666667 0.2 0.16666667]
|
|
|
|
mean value: 0.4557142857142857
|
|
|
|
key: train_jcc
|
|
value: [0.6 0.6744186 0.65116279 0.65909091 0.62790698 0.64285714
|
|
0.61363636 0.61363636 0.69767442 0.65909091]
|
|
|
|
mean value: 0.6439474479009363
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00900865 0.0088141 0.00891018 0.00897598 0.00888276 0.00893426
|
|
0.00940371 0.00891089 0.00908875 0.00898051]
|
|
|
|
mean value: 0.008990979194641114
|
|
|
|
key: score_time
|
|
value: [0.00871253 0.00863552 0.00881696 0.0085485 0.00857282 0.00857806
|
|
0.00863981 0.00858784 0.00856829 0.0085876 ]
|
|
|
|
mean value: 0.008624792098999023
|
|
|
|
key: test_mcc
|
|
value: [-0.35 0.79056942 0.31622777 0.35 0.5 -0.37796447
|
|
1. 0. -0.25819889 0.57735027]
|
|
|
|
mean value: 0.254798408749217
|
|
|
|
key: train_mcc
|
|
value: [0.73786392 0.73786392 0.76031294 0.8161102 0.76985122 0.76342228
|
|
0.73786479 0.79388419 0.79056942 0.76342228]
|
|
|
|
mean value: 0.7671165159319231
|
|
|
|
key: test_accuracy
|
|
value: [0.33333333 0.88888889 0.66666667 0.66666667 0.75 0.375
|
|
1. 0.5 0.375 0.75 ]
|
|
|
|
mean value: 0.6305555555555555
|
|
|
|
key: train_accuracy
|
|
value: [0.86666667 0.86666667 0.88 0.90666667 0.88157895 0.88157895
|
|
0.86842105 0.89473684 0.89473684 0.88157895]
|
|
|
|
mean value: 0.8822631578947369
|
|
|
|
key: test_fscore
|
|
value: [0.25 0.85714286 0.72727273 0.66666667 0.75 0.54545455
|
|
1. 0.5 0.28571429 0.66666667]
|
|
|
|
mean value: 0.6248917748917748
|
|
|
|
key: train_fscore
|
|
value: [0.86111111 0.86111111 0.88 0.90909091 0.87323944 0.88311688
|
|
0.86486486 0.88888889 0.89189189 0.88311688]
|
|
|
|
mean value: 0.8796431979812261
|
|
|
|
key: test_precision
|
|
value: [0.25 1. 0.66666667 0.75 0.75 0.42857143
|
|
1. 0.5 0.33333333 1. ]
|
|
|
|
mean value: 0.6678571428571428
|
|
|
|
key: train_precision
|
|
value: [0.91176471 0.91176471 0.86842105 0.875 0.93939394 0.87179487
|
|
0.88888889 0.94117647 0.91666667 0.87179487]
|
|
|
|
mean value: 0.8996666173523759
|
|
|
|
key: test_recall
|
|
value: [0.25 0.75 0.8 0.6 0.75 0.75 1. 0.5 0.25 0.5 ]
|
|
|
|
mean value: 0.615
|
|
|
|
key: train_recall
|
|
value: [0.81578947 0.81578947 0.89189189 0.94594595 0.81578947 0.89473684
|
|
0.84210526 0.84210526 0.86842105 0.89473684]
|
|
|
|
mean value: 0.8627311522048364
|
|
|
|
key: test_roc_auc
|
|
value: [0.325 0.875 0.65 0.675 0.75 0.375 1. 0.5 0.375 0.75 ]
|
|
|
|
mean value: 0.6275000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.8673542 0.8673542 0.88015647 0.9071835 0.88157895 0.88157895
|
|
0.86842105 0.89473684 0.89473684 0.88157895]
|
|
|
|
mean value: 0.8824679943100996
|
|
|
|
key: test_jcc
|
|
value: [0.14285714 0.75 0.57142857 0.5 0.6 0.375
|
|
1. 0.33333333 0.16666667 0.5 ]
|
|
|
|
mean value: 0.49392857142857144
|
|
|
|
key: train_jcc
|
|
value: [0.75609756 0.75609756 0.78571429 0.83333333 0.775 0.79069767
|
|
0.76190476 0.8 0.80487805 0.79069767]
|
|
|
|
mean value: 0.7854420900521297
|
|
|
|
MCC on Blind test: -0.08
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.3566103 0.38200951 0.43109488 0.38567448 0.43245578 0.38491535
|
|
0.39917016 0.36103129 0.50263786 0.37672877]
|
|
|
|
mean value: 0.40123283863067627
|
|
|
|
key: score_time
|
|
value: [0.01195097 0.01184797 0.01224089 0.01200986 0.01193523 0.01200175
|
|
0.01197886 0.01195788 0.01205945 0.0119791 ]
|
|
|
|
mean value: 0.011996197700500488
|
|
|
|
key: test_mcc
|
|
value: [ 0.35 1. 0.31622777 0.15811388 0.77459667 0.
|
|
0.77459667 -0.25819889 0. 0.25819889]
|
|
|
|
mean value: 0.3373534987508224
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 1. 0.66666667 0.55555556 0.875 0.5
|
|
0.875 0.375 0.5 0.625 ]
|
|
|
|
mean value: 0.6638888888888889
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 1. 0.72727273 0.5 0.88888889 0.33333333
|
|
0.85714286 0.44444444 0.5 0.57142857]
|
|
|
|
mean value: 0.6489177489177489
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 1. 0.66666667 0.66666667 0.8 0.5
|
|
1. 0.4 0.5 0.66666667]
|
|
|
|
mean value: 0.6799999999999999
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.8 0.4 1. 0.25 0.75 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.645
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.675 1. 0.65 0.575 0.875 0.5 0.875 0.375 0.5 0.625]
|
|
|
|
mean value: 0.665
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 1. 0.57142857 0.33333333 0.8 0.2
|
|
0.75 0.28571429 0.33333333 0.4 ]
|
|
|
|
mean value: 0.5173809523809524
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.14
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01321459 0.01290941 0.01006556 0.00965619 0.00955391 0.00958467
|
|
0.00958467 0.00956392 0.00945449 0.0097909 ]
|
|
|
|
mean value: 0.01033782958984375
|
|
|
|
key: score_time
|
|
value: [0.0114274 0.0094862 0.00863719 0.00826406 0.00828934 0.00823569
|
|
0.00829005 0.00831294 0.00828815 0.00835872]
|
|
|
|
mean value: 0.008758974075317384
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.31622777 0.8 1. 1. 0.5
|
|
1. 0.77459667 0.25819889 0.77459667]
|
|
|
|
mean value: 0.7056075526280642
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.66666667 0.88888889 1. 1. 0.75
|
|
1. 0.875 0.625 0.875 ]
|
|
|
|
mean value: 0.8458333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.57142857 0.88888889 1. 1. 0.75
|
|
1. 0.88888889 0.66666667 0.85714286]
|
|
|
|
mean value: 0.8423015873015873
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 1. 1. 1. 0.75
|
|
1. 0.8 0.6 1. ]
|
|
|
|
mean value: 0.8483333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.5 0.8 1. 1. 0.75 1. 1. 0.75 0.75]
|
|
|
|
mean value: 0.855
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.65 0.9 1. 1. 0.75 1. 0.875 0.625 0.875]
|
|
|
|
mean value: 0.8475
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.4 0.8 1. 1. 0.6
|
|
1. 0.8 0.5 0.75 ]
|
|
|
|
mean value: 0.7516666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08042169 0.08020353 0.07984281 0.08248663 0.08033347 0.08022118
|
|
0.08093882 0.07976699 0.07971478 0.08031654]
|
|
|
|
mean value: 0.08042464256286622
|
|
|
|
key: score_time
|
|
value: [0.01688957 0.01686358 0.01676273 0.01679468 0.0168581 0.01673651
|
|
0.01667166 0.01647496 0.01671505 0.01674414]
|
|
|
|
mean value: 0.0167510986328125
|
|
|
|
key: test_mcc
|
|
value: [0.31622777 0.31622777 0.35 0.35 0.77459667 0.25819889
|
|
1. 0. 0. 0.57735027]
|
|
|
|
mean value: 0.39426013602119464
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.66666667 0.66666667 0.875 0.625
|
|
1. 0.5 0.5 0.75 ]
|
|
|
|
mean value: 0.6916666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.57142857 0.66666667 0.66666667 0.88888889 0.66666667
|
|
1. 0.5 0.5 0.66666667]
|
|
|
|
mean value: 0.6698412698412698
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.75 0.75 0.8 0.6
|
|
1. 0.5 0.5 1. ]
|
|
|
|
mean value: 0.7233333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.6 0.6 1. 0.75 1. 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.645
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.65 0.675 0.675 0.875 0.625 1. 0.5 0.5 0.75 ]
|
|
|
|
mean value: 0.6900000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.4 0.5 0.5 0.8 0.5
|
|
1. 0.33333333 0.33333333 0.5 ]
|
|
|
|
mean value: 0.5266666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.14
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00846887 0.00840354 0.00837779 0.00848317 0.00832963 0.00833416
|
|
0.00833273 0.0083673 0.00847006 0.0083406 ]
|
|
|
|
mean value: 0.00839078426361084
|
|
|
|
key: score_time
|
|
value: [0.00829053 0.00831199 0.00843668 0.00840259 0.00828886 0.00839186
|
|
0.00829005 0.00831199 0.00830126 0.00832486]
|
|
|
|
mean value: 0.008335065841674805
|
|
|
|
key: test_mcc
|
|
value: [-0.05976143 0.05976143 -0.1 0.1 -0.57735027 -0.5
|
|
0.37796447 0.25819889 0.25819889 0.5 ]
|
|
|
|
mean value: 0.03170119833139236
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.44444444 0.55555556 0.44444444 0.55555556 0.25 0.25
|
|
0.625 0.625 0.625 0.75 ]
|
|
|
|
mean value: 0.5125
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.33333333 0.44444444 0.6 0.4 0.25
|
|
0.72727273 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.5383838383838384
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.42857143 0.5 0.5 0.6 0.33333333 0.25
|
|
0.57142857 0.6 0.6 0.75 ]
|
|
|
|
mean value: 0.5133333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.25 0.4 0.6 0.5 0.25 1. 0.75 0.75 0.75]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.475 0.525 0.45 0.55 0.25 0.25 0.625 0.625 0.625 0.75 ]
|
|
|
|
mean value: 0.5125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.2 0.28571429 0.42857143 0.25 0.14285714
|
|
0.57142857 0.5 0.5 0.6 ]
|
|
|
|
mean value: 0.38535714285714284
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.99724722 0.99640512 0.99026775 0.99570179 0.99687743 0.9930985
|
|
0.99392676 0.99498487 0.99451756 0.99996042]
|
|
|
|
mean value: 0.9952987432479858
|
|
|
|
key: score_time
|
|
value: [0.08700514 0.08634806 0.08625293 0.08669138 0.08667207 0.08684969
|
|
0.08677268 0.08643746 0.08640552 0.09360075]
|
|
|
|
mean value: 0.08730356693267823
|
|
|
|
key: test_mcc
|
|
value: [0.55 0.55 0.35 0.63245553 0.77459667 0.25819889
|
|
1. 0. 0. 0.77459667]
|
|
|
|
mean value: 0.4889847760263804
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.77777778 0.66666667 0.77777778 0.875 0.625
|
|
1. 0.5 0.5 0.875 ]
|
|
|
|
mean value: 0.7375
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.75 0.66666667 0.75 0.88888889 0.66666667
|
|
1. 0.5 0.5 0.85714286]
|
|
|
|
mean value: 0.7329365079365079
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.75 1. 0.8 0.6 1. 0.5 0.5 1. ]
|
|
|
|
mean value: 0.765
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.6 0.6 1. 0.75 1. 0.5 0.5 0.75]
|
|
|
|
mean value: 0.72
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.775 0.675 0.8 0.875 0.625 1. 0.5 0.5 0.875]
|
|
|
|
mean value: 0.74
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.6 0.5 0.6 0.8 0.5
|
|
1. 0.33333333 0.33333333 0.75 ]
|
|
|
|
mean value: 0.6016666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.92377687 0.84681273 0.88949633 0.8409636 0.90135837 0.90286493
|
|
0.88374043 0.92271328 0.81917858 0.86645675]
|
|
|
|
mean value: 0.8797361850738525
|
|
|
|
key: score_time
|
|
value: [0.22073531 0.22147965 0.21408248 0.21105886 0.17403078 0.21011996
|
|
0.22706914 0.24071097 0.22439933 0.23749518]
|
|
|
|
mean value: 0.21811816692352295
|
|
|
|
key: test_mcc
|
|
value: [0.1 0.8 0.35 0.63245553 0.77459667 0.5
|
|
1. 0.25819889 0. 0.77459667]
|
|
|
|
mean value: 0.5189847760263804
|
|
|
|
key: train_mcc
|
|
value: [0.86699858 0.92034139 0.89451381 0.89331437 0.89473684 0.89473684
|
|
0.92137172 0.89597867 0.94736842 0.92137172]
|
|
|
|
mean value: 0.9050732362390278
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.88888889 0.66666667 0.77777778 0.875 0.75
|
|
1. 0.625 0.5 0.875 ]
|
|
|
|
mean value: 0.7513888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.93333333 0.96 0.94666667 0.94666667 0.94736842 0.94736842
|
|
0.96052632 0.94736842 0.97368421 0.96052632]
|
|
|
|
mean value: 0.9523508771929824
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.88888889 0.66666667 0.75 0.88888889 0.75
|
|
1. 0.57142857 0.5 0.85714286]
|
|
|
|
mean value: 0.7373015873015873
|
|
|
|
key: train_fscore
|
|
value: [0.93333333 0.96 0.94444444 0.94594595 0.94736842 0.94736842
|
|
0.96103896 0.94594595 0.97368421 0.96 ]
|
|
|
|
mean value: 0.951912968334021
|
|
|
|
key: test_precision
|
|
value: [0.5 0.8 0.75 1. 0.8 0.75
|
|
1. 0.66666667 0.5 1. ]
|
|
|
|
mean value: 0.7766666666666666
|
|
|
|
key: train_precision
|
|
value: [0.94594595 0.97297297 0.97142857 0.94594595 0.94736842 0.94736842
|
|
0.94871795 0.97222222 0.97368421 0.97297297]
|
|
|
|
mean value: 0.9598627632838159
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 0.6 0.6 1. 0.75 1. 0.5 0.5 0.75]
|
|
|
|
mean value: 0.72
|
|
|
|
key: train_recall
|
|
value: [0.92105263 0.94736842 0.91891892 0.94594595 0.94736842 0.94736842
|
|
0.97368421 0.92105263 0.97368421 0.94736842]
|
|
|
|
mean value: 0.9443812233285918
|
|
|
|
key: test_roc_auc
|
|
value: [0.55 0.9 0.675 0.8 0.875 0.75 1. 0.625 0.5 0.875]
|
|
|
|
mean value: 0.755
|
|
|
|
key: train_roc_auc
|
|
value: [0.93349929 0.9601707 0.94630156 0.94665718 0.94736842 0.94736842
|
|
0.96052632 0.94736842 0.97368421 0.96052632]
|
|
|
|
mean value: 0.9523470839260313
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.8 0.5 0.6 0.8 0.6
|
|
1. 0.4 0.33333333 0.75 ]
|
|
|
|
mean value: 0.6116666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.875 0.92307692 0.89473684 0.8974359 0.9 0.9
|
|
0.925 0.8974359 0.94871795 0.92307692]
|
|
|
|
mean value: 0.9084480431848854
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02315235 0.00852799 0.00853801 0.00882125 0.00863647 0.00860405
|
|
0.00869322 0.00861049 0.00903416 0.00846434]
|
|
|
|
mean value: 0.010108232498168945
|
|
|
|
key: score_time
|
|
value: [0.01027966 0.00849533 0.00891209 0.00854635 0.00892663 0.00851274
|
|
0.00859284 0.00843501 0.00869679 0.00852942]
|
|
|
|
mean value: 0.008792686462402343
|
|
|
|
key: test_mcc
|
|
value: [ 0.1 0.63245553 0.31622777 0.1 0.25819889 -0.37796447
|
|
1. 0. 0.57735027 0. ]
|
|
|
|
mean value: 0.26062679839780734
|
|
|
|
key: train_mcc
|
|
value: [0.60970498 0.65855734 0.64236775 0.60670651 0.6599546 0.63960215
|
|
0.63960215 0.63510735 0.69290233 0.62298833]
|
|
|
|
mean value: 0.6407493474075491
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.77777778 0.66666667 0.55555556 0.625 0.375
|
|
1. 0.5 0.75 0.5 ]
|
|
|
|
mean value: 0.6305555555555555
|
|
|
|
key: train_accuracy
|
|
value: [0.8 0.82666667 0.81333333 0.78666667 0.82894737 0.81578947
|
|
0.81578947 0.81578947 0.84210526 0.80263158]
|
|
|
|
mean value: 0.8147719298245614
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.8 0.72727273 0.6 0.66666667 0.54545455
|
|
1. 0.5 0.8 0.6 ]
|
|
|
|
mean value: 0.673939393939394
|
|
|
|
key: train_fscore
|
|
value: [0.81927711 0.83950617 0.82926829 0.81395349 0.83544304 0.82926829
|
|
0.82926829 0.825 0.85365854 0.82352941]
|
|
|
|
mean value: 0.829817263401887
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.66666667 0.6 0.6 0.42857143
|
|
1. 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: 0.6128571428571429
|
|
|
|
key: train_precision
|
|
value: [0.75555556 0.79069767 0.75555556 0.71428571 0.80487805 0.77272727
|
|
0.77272727 0.78571429 0.79545455 0.74468085]
|
|
|
|
mean value: 0.7692276776283125
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 0.8 0.6 0.75 0.75 1. 0.5 1. 0.75]
|
|
|
|
mean value: 0.765
|
|
|
|
key: train_recall
|
|
value: [0.89473684 0.89473684 0.91891892 0.94594595 0.86842105 0.89473684
|
|
0.89473684 0.86842105 0.92105263 0.92105263]
|
|
|
|
mean value: 0.902275960170697
|
|
|
|
key: test_roc_auc
|
|
value: [0.55 0.8 0.65 0.55 0.625 0.375 1. 0.5 0.75 0.5 ]
|
|
|
|
mean value: 0.63
|
|
|
|
key: train_roc_auc
|
|
value: [0.79871977 0.8257468 0.81472262 0.78876245 0.82894737 0.81578947
|
|
0.81578947 0.81578947 0.84210526 0.80263158]
|
|
|
|
mean value: 0.814900426742532
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.66666667 0.57142857 0.42857143 0.5 0.375
|
|
1. 0.33333333 0.66666667 0.42857143]
|
|
|
|
mean value: 0.5303571428571429
|
|
|
|
key: train_jcc
|
|
value: [0.69387755 0.72340426 0.70833333 0.68627451 0.7173913 0.70833333
|
|
0.70833333 0.70212766 0.74468085 0.7 ]
|
|
|
|
mean value: 0.7092756131129603
|
|
|
|
MCC on Blind test: -0.28
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.05683494 0.03863883 0.04392576 0.13575006 0.07225108 0.03703737
|
|
0.03737426 0.03567529 0.04046488 0.04343057]
|
|
|
|
mean value: 0.05413830280303955
|
|
|
|
key: score_time
|
|
value: [0.01127696 0.01120019 0.01132512 0.01322961 0.01155353 0.01126075
|
|
0.01119399 0.01106644 0.01122379 0.01133251]
|
|
|
|
mean value: 0.011466288566589355
|
|
|
|
key: test_mcc
|
|
value: [0.79056942 0.31622777 0.8 0.63245553 1. 1.
|
|
0.77459667 0.57735027 0.57735027 0.57735027]
|
|
|
|
mean value: 0.7045900189902969
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.66666667 0.88888889 0.77777778 1. 1.
|
|
0.875 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.8347222222222223
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.57142857 0.88888889 0.75 1. 1.
|
|
0.88888889 0.8 0.8 0.66666667]
|
|
|
|
mean value: 0.8223015873015873
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 1. 1. 1. 1.
|
|
0.8 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.8 0.6 1. 1. 1. 1. 1. 0.5 ]
|
|
|
|
mean value: 0.815
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.65 0.9 0.8 1. 1. 0.875 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.4 0.8 0.6 1. 1.
|
|
0.8 0.66666667 0.66666667 0.5 ]
|
|
|
|
mean value: 0.7183333333333334
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.58
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02317381 0.04550219 0.04047465 0.03977585 0.0489924 0.04040909
|
|
0.04024577 0.0257802 0.0174911 0.03982115]
|
|
|
|
mean value: 0.0361666202545166
|
|
|
|
key: score_time
|
|
value: [0.01177001 0.02361727 0.02034259 0.02165008 0.02103066 0.02211738
|
|
0.01951337 0.01165795 0.01800728 0.01705456]
|
|
|
|
mean value: 0.018676114082336426
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.35 0.31622777 0.35 0.77459667 0.25819889
|
|
0.5 0.25819889 0.57735027 0. ]
|
|
|
|
mean value: 0.4017028015975945
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.66666667 0.66666667 0.66666667 0.875 0.625
|
|
0.75 0.625 0.75 0.5 ]
|
|
|
|
mean value: 0.6902777777777778
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.66666667 0.72727273 0.66666667 0.85714286 0.57142857
|
|
0.75 0.66666667 0.8 0.33333333]
|
|
|
|
mean value: 0.6839177489177489
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.6 0.66666667 0.75 1. 0.66666667
|
|
0.75 0.6 0.66666667 0.5 ]
|
|
|
|
mean value: 0.6866666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 0.8 0.6 0.75 0.5 0.75 0.75 1. 0.25]
|
|
|
|
mean value: 0.715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.675 0.65 0.675 0.875 0.625 0.75 0.625 0.75 0.5 ]
|
|
|
|
mean value: 0.6925
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.5 0.57142857 0.5 0.75 0.4
|
|
0.6 0.5 0.66666667 0.2 ]
|
|
|
|
mean value: 0.5354761904761904
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.14
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01161981 0.00956011 0.00865626 0.00829458 0.00826788 0.00819016
|
|
0.00825834 0.00820041 0.0082891 0.00827694]
|
|
|
|
mean value: 0.008761358261108399
|
|
|
|
key: score_time
|
|
value: [0.01129651 0.00864458 0.00855899 0.00829434 0.00819063 0.00822902
|
|
0.00825071 0.00828648 0.00828671 0.00831985]
|
|
|
|
mean value: 0.008635783195495605
|
|
|
|
key: test_mcc
|
|
value: [-0.35 -0.1 -0.31622777 0.15811388 0.25819889 0.
|
|
0.77459667 -0.25819889 0.25819889 0.77459667]
|
|
|
|
mean value: 0.1199278345221709
|
|
|
|
key: train_mcc
|
|
value: [0.3117865 0.38895144 0.31574008 0.32052793 0.39597276 0.40160966
|
|
0.36893239 0.29038002 0.42105263 0.29795094]
|
|
|
|
mean value: 0.3512904359509771
|
|
|
|
key: test_accuracy
|
|
value: [0.33333333 0.44444444 0.44444444 0.55555556 0.625 0.5
|
|
0.875 0.375 0.625 0.875 ]
|
|
|
|
mean value: 0.5652777777777778
|
|
|
|
key: train_accuracy
|
|
value: [0.65333333 0.69333333 0.65333333 0.65333333 0.69736842 0.69736842
|
|
0.68421053 0.64473684 0.71052632 0.64473684]
|
|
|
|
mean value: 0.6732280701754386
|
|
|
|
key: test_fscore
|
|
value: [0.25 0.44444444 0.61538462 0.5 0.66666667 0.6
|
|
0.88888889 0.44444444 0.57142857 0.85714286]
|
|
|
|
mean value: 0.5838400488400488
|
|
|
|
key: train_fscore
|
|
value: [0.69047619 0.71604938 0.68292683 0.69047619 0.70886076 0.72289157
|
|
0.69230769 0.65822785 0.71052632 0.68235294]
|
|
|
|
mean value: 0.6955095716070356
|
|
|
|
key: test_precision
|
|
value: [0.25 0.4 0.5 0.66666667 0.6 0.5
|
|
0.8 0.4 0.66666667 1. ]
|
|
|
|
mean value: 0.5783333333333334
|
|
|
|
key: train_precision
|
|
value: [0.63043478 0.6744186 0.62222222 0.61702128 0.68292683 0.66666667
|
|
0.675 0.63414634 0.71052632 0.61702128]
|
|
|
|
mean value: 0.6530384315861417
|
|
|
|
key: test_recall
|
|
value: [0.25 0.5 0.8 0.4 0.75 0.75 1. 0.5 0.5 0.75]
|
|
|
|
mean value: 0.62
|
|
|
|
key: train_recall
|
|
value: [0.76315789 0.76315789 0.75675676 0.78378378 0.73684211 0.78947368
|
|
0.71052632 0.68421053 0.71052632 0.76315789]
|
|
|
|
mean value: 0.7461593172119488
|
|
|
|
key: test_roc_auc
|
|
value: [0.325 0.45 0.4 0.575 0.625 0.5 0.875 0.375 0.625 0.875]
|
|
|
|
mean value: 0.5625
|
|
|
|
key: train_roc_auc
|
|
value: [0.65184922 0.69238976 0.65469417 0.65504979 0.69736842 0.69736842
|
|
0.68421053 0.64473684 0.71052632 0.64473684]
|
|
|
|
mean value: 0.6732930298719773
|
|
|
|
key: test_jcc
|
|
value: [0.14285714 0.28571429 0.44444444 0.33333333 0.5 0.42857143
|
|
0.8 0.28571429 0.4 0.75 ]
|
|
|
|
mean value: 0.43706349206349204
|
|
|
|
key: train_jcc
|
|
value: [0.52727273 0.55769231 0.51851852 0.52727273 0.54901961 0.56603774
|
|
0.52941176 0.49056604 0.55102041 0.51785714]
|
|
|
|
mean value: 0.5334668977910614
|
|
|
|
MCC on Blind test: -0.08
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00928378 0.01257086 0.01270509 0.01370788 0.01286483 0.01280713
|
|
0.0128541 0.01311278 0.01278234 0.01241827]
|
|
|
|
mean value: 0.01251070499420166
|
|
|
|
key: score_time
|
|
value: [0.00830221 0.0113523 0.01127315 0.01129389 0.01130509 0.0112164
|
|
0.01124454 0.01122737 0.01126814 0.01124501]
|
|
|
|
mean value: 0.010972809791564942
|
|
|
|
key: test_mcc
|
|
value: [ 0.55 0.79056942 0.31622777 0.35 0.25819889 0.25819889
|
|
0.57735027 0. -0.37796447 0.57735027]
|
|
|
|
mean value: 0.32999310259232795
|
|
|
|
key: train_mcc
|
|
value: [0.89451381 0.82825406 0.92028493 1. 0.77644535 0.92393644
|
|
0.34299717 0.9486833 0.55708601 0.84210526]
|
|
|
|
mean value: 0.8034306326794696
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.66666667 0.66666667 0.625 0.625
|
|
0.75 0.5 0.375 0.75 ]
|
|
|
|
mean value: 0.6625
|
|
|
|
key: train_accuracy
|
|
value: [0.94666667 0.90666667 0.96 1. 0.88157895 0.96052632
|
|
0.60526316 0.97368421 0.73684211 0.92105263]
|
|
|
|
mean value: 0.8892280701754386
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.85714286 0.72727273 0.66666667 0.57142857 0.57142857
|
|
0.8 0.6 0.54545455 0.66666667]
|
|
|
|
mean value: 0.6756060606060605
|
|
|
|
key: train_fscore
|
|
value: [0.94871795 0.89855072 0.95890411 1. 0.86956522 0.96202532
|
|
0.71698113 0.97435897 0.79166667 0.92105263]
|
|
|
|
mean value: 0.9041822721471732
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.66666667 0.75 0.66666667 0.66666667
|
|
0.66666667 0.5 0.42857143 1. ]
|
|
|
|
mean value: 0.7095238095238096
|
|
|
|
key: train_precision
|
|
value: [0.925 1. 0.97222222 1. 0.96774194 0.92682927
|
|
0.55882353 0.95 0.65517241 0.92105263]
|
|
|
|
mean value: 0.8876842000782592
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.8 0.6 0.5 0.5 1. 0.75 0.75 0.5 ]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_recall
|
|
value: [0.97368421 0.81578947 0.94594595 1. 0.78947368 1.
|
|
1. 1. 1. 0.92105263]
|
|
|
|
mean value: 0.9445945945945946
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.875 0.65 0.675 0.625 0.625 0.75 0.5 0.375 0.75 ]
|
|
|
|
mean value: 0.66
|
|
|
|
key: train_roc_auc
|
|
value: [0.94630156 0.90789474 0.95981508 1. 0.88157895 0.96052632
|
|
0.60526316 0.97368421 0.73684211 0.92105263]
|
|
|
|
mean value: 0.8892958748221906
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.75 0.57142857 0.5 0.4 0.4
|
|
0.66666667 0.42857143 0.375 0.5 ]
|
|
|
|
mean value: 0.5191666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.90243902 0.81578947 0.92105263 1. 0.76923077 0.92682927
|
|
0.55882353 0.95 0.65517241 0.85365854]
|
|
|
|
mean value: 0.8352995646967087
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.27
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01247644 0.01255226 0.01255894 0.01262641 0.01262808 0.01284647
|
|
0.01281023 0.01259208 0.01189542 0.01205182]
|
|
|
|
mean value: 0.012503814697265626
|
|
|
|
key: score_time
|
|
value: [0.01069069 0.01129389 0.0114367 0.01160622 0.01173592 0.0112946
|
|
0.01147819 0.01138663 0.01150131 0.01150846]
|
|
|
|
mean value: 0.011393260955810548
|
|
|
|
key: test_mcc
|
|
value: [ 0.47809144 0.31622777 0.35 0.1 0.25819889 0.37796447
|
|
1. 0. -0.25819889 0.57735027]
|
|
|
|
mean value: 0.3199633951949448
|
|
|
|
key: train_mcc
|
|
value: [0.94797081 0.4855009 0.76006756 0.74006579 0.8468098 0.73133412
|
|
0.89597867 0.92137172 0.8468098 0.8183437 ]
|
|
|
|
mean value: 0.7994252863353628
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.55555556 0.66666667 0.55555556 0.625 0.625
|
|
1. 0.5 0.375 0.75 ]
|
|
|
|
mean value: 0.6319444444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.97333333 0.69333333 0.86666667 0.85333333 0.92105263 0.85526316
|
|
0.94736842 0.96052632 0.92105263 0.90789474]
|
|
|
|
mean value: 0.8899824561403509
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.66666667 0.66666667 0.6 0.57142857 0.4
|
|
1. 0.6 0.44444444 0.66666667]
|
|
|
|
mean value: 0.6343145743145743
|
|
|
|
key: train_fscore
|
|
value: [0.97435897 0.76767677 0.84375 0.87058824 0.91666667 0.8358209
|
|
0.94594595 0.96103896 0.925 0.90410959]
|
|
|
|
mean value: 0.8944956035544918
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.5 0.75 0.6 0.66666667 1.
|
|
1. 0.5 0.4 1. ]
|
|
|
|
mean value: 0.6988095238095238
|
|
|
|
key: train_precision
|
|
value: [0.95 0.62295082 1. 0.77083333 0.97058824 0.96551724
|
|
0.97222222 0.94871795 0.88095238 0.94285714]
|
|
|
|
mean value: 0.9024639324428587
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.6 0.6 0.5 0.25 1. 0.75 0.5 0.5 ]
|
|
|
|
mean value: 0.67
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.72972973 1. 0.86842105 0.73684211
|
|
0.92105263 0.97368421 0.97368421 0.86842105]
|
|
|
|
mean value: 0.9071834992887624
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.6 0.675 0.55 0.625 0.625 1. 0.5 0.375 0.75 ]
|
|
|
|
mean value: 0.64
|
|
|
|
key: train_roc_auc
|
|
value: [0.97297297 0.68918919 0.86486486 0.85526316 0.92105263 0.85526316
|
|
0.94736842 0.96052632 0.92105263 0.90789474]
|
|
|
|
mean value: 0.8895448079658607
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.5 0.5 0.42857143 0.4 0.25
|
|
1. 0.42857143 0.28571429 0.5 ]
|
|
|
|
mean value: 0.48642857142857143
|
|
|
|
key: train_jcc
|
|
value: [0.95 0.62295082 0.72972973 0.77083333 0.84615385 0.71794872
|
|
0.8974359 0.925 0.86046512 0.825 ]
|
|
|
|
mean value: 0.8145517460552726
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08501792 0.07641745 0.07635164 0.07650113 0.07603097 0.07646155
|
|
0.076792 0.0771904 0.07685828 0.07624316]
|
|
|
|
mean value: 0.07738645076751709
|
|
|
|
key: score_time
|
|
value: [0.01444697 0.01437187 0.01435518 0.01437235 0.01430559 0.01472449
|
|
0.01446629 0.01442027 0.01439619 0.01433682]
|
|
|
|
mean value: 0.01441960334777832
|
|
|
|
key: test_mcc
|
|
value: [1. 0.31622777 0.8 0.8 0.77459667 1.
|
|
1. 0.77459667 0.25819889 0.5 ]
|
|
|
|
mean value: 0.7223619994246966
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.66666667 0.88888889 0.88888889 0.875 1.
|
|
1. 0.875 0.625 0.75 ]
|
|
|
|
mean value: 0.8569444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.57142857 0.88888889 0.88888889 0.85714286 1.
|
|
1. 0.88888889 0.66666667 0.75 ]
|
|
|
|
mean value: 0.8511904761904762
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 1. 1. 1. 1.
|
|
1. 0.8 0.6 0.75 ]
|
|
|
|
mean value: 0.8816666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.5 0.8 0.8 0.75 1. 1. 1. 0.75 0.75]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.65 0.9 0.9 0.875 1. 1. 0.875 0.625 0.75 ]
|
|
|
|
mean value: 0.8575
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.4 0.8 0.8 0.75 1. 1. 0.8 0.5 0.6 ]
|
|
|
|
mean value: 0.765
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03155708 0.03448486 0.02696562 0.05002117 0.03130126 0.03591418
|
|
0.03443122 0.03155398 0.04987168 0.03369951]
|
|
|
|
mean value: 0.035980057716369626
|
|
|
|
key: score_time
|
|
value: [0.01772404 0.02515697 0.02325606 0.03016686 0.02305079 0.03444767
|
|
0.02957582 0.02519584 0.02852535 0.0280509 ]
|
|
|
|
mean value: 0.02651503086090088
|
|
|
|
key: test_mcc
|
|
value: [0.79056942 0.55 0.8 1. 1. 1.
|
|
0.77459667 0.77459667 0.57735027 0.77459667]
|
|
|
|
mean value: 0.8041709691956171
|
|
|
|
key: train_mcc
|
|
value: [0.94665718 0.94665718 0.94804294 0.97368421 1. 0.9486833
|
|
0.9486833 0.9486833 0.94736842 0.97402153]
|
|
|
|
mean value: 0.9582481371496262
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.77777778 0.88888889 1. 1. 1.
|
|
0.875 0.875 0.75 0.875 ]
|
|
|
|
mean value: 0.8930555555555555
|
|
|
|
key: train_accuracy
|
|
value: [0.97333333 0.97333333 0.97333333 0.98666667 1. 0.97368421
|
|
0.97368421 0.97368421 0.97368421 0.98684211]
|
|
|
|
mean value: 0.9788245614035088
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.75 0.88888889 1. 1. 1.
|
|
0.88888889 0.88888889 0.8 0.85714286]
|
|
|
|
mean value: 0.8930952380952382
|
|
|
|
key: train_fscore
|
|
value: [0.97368421 0.97368421 0.97368421 0.98666667 1. 0.97435897
|
|
0.97435897 0.97435897 0.97368421 0.98701299]
|
|
|
|
mean value: 0.979149341886184
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 1. 1. 1. 1.
|
|
0.8 0.8 0.66666667 1. ]
|
|
|
|
mean value: 0.9016666666666666
|
|
|
|
key: train_precision
|
|
value: [0.97368421 0.97368421 0.94871795 0.97368421 1. 0.95
|
|
0.95 0.95 0.97368421 0.97435897]
|
|
|
|
mean value: 0.9667813765182186
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.8 1. 1. 1. 1. 1. 1. 0.75]
|
|
|
|
mean value: 0.905
|
|
|
|
key: train_recall
|
|
value: [0.97368421 0.97368421 1. 1. 1. 1.
|
|
1. 1. 0.97368421 1. ]
|
|
|
|
mean value: 0.9921052631578947
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.775 0.9 1. 1. 1. 0.875 0.875 0.75 0.875]
|
|
|
|
mean value: 0.8925000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.97332859 0.97332859 0.97368421 0.98684211 1. 0.97368421
|
|
0.97368421 0.97368421 0.97368421 0.98684211]
|
|
|
|
mean value: 0.9788762446657184
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.6 0.8 1. 1. 1.
|
|
0.8 0.8 0.66666667 0.75 ]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.94871795 0.94871795 0.94871795 0.97368421 1. 0.95
|
|
0.95 0.95 0.94871795 0.97435897]
|
|
|
|
mean value: 0.9592914979757085
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01289177 0.0145824 0.01474333 0.01518726 0.01512861 0.01514196
|
|
0.01518893 0.015167 0.01515603 0.01526713]
|
|
|
|
mean value: 0.014845442771911622
|
|
|
|
key: score_time
|
|
value: [0.0110023 0.01099491 0.01152611 0.01153374 0.01145935 0.01148629
|
|
0.01153278 0.01152658 0.01151633 0.01182103]
|
|
|
|
mean value: 0.011439943313598632
|
|
|
|
key: test_mcc
|
|
value: [ 0.31622777 0.5976143 0.15811388 0.47809144 0.77459667 0.
|
|
0.77459667 0.25819889 -0.57735027 0.57735027]
|
|
|
|
mean value: 0.3357439625656339
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.77777778 0.55555556 0.66666667 0.875 0.5
|
|
0.875 0.625 0.25 0.75 ]
|
|
|
|
mean value: 0.6541666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.66666667 0.5 0.57142857 0.88888889 0.6
|
|
0.88888889 0.57142857 0.4 0.66666667]
|
|
|
|
mean value: 0.6325396825396825
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.66666667 1. 0.8 0.5
|
|
0.8 0.66666667 0.33333333 1. ]
|
|
|
|
mean value: 0.7433333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.4 0.4 1. 0.75 1. 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.605
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.75 0.575 0.7 0.875 0.5 0.875 0.625 0.25 0.75 ]
|
|
|
|
mean value: 0.655
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.5 0.33333333 0.4 0.8 0.42857143
|
|
0.8 0.4 0.25 0.5 ]
|
|
|
|
mean value: 0.4811904761904762
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.06
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18700218 0.1452148 0.17105246 0.17293596 0.17147398 0.17163014
|
|
0.17711973 0.17467976 0.16508913 0.17534304]
|
|
|
|
mean value: 0.17115411758422852
|
|
|
|
key: score_time
|
|
value: [0.00877285 0.00868893 0.00878572 0.00880146 0.00884652 0.00872469
|
|
0.0088017 0.00879931 0.00900173 0.00868034]
|
|
|
|
mean value: 0.008790326118469239
|
|
|
|
key: test_mcc
|
|
value: [1. 0.31622777 0.8 0.8 1. 0.5
|
|
1. 0.25819889 0.57735027 0.57735027]
|
|
|
|
mean value: 0.6829127194143251
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.66666667 0.88888889 0.88888889 1. 0.75
|
|
1. 0.625 0.75 0.75 ]
|
|
|
|
mean value: 0.8319444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.57142857 0.88888889 0.88888889 1. 0.75
|
|
1. 0.66666667 0.8 0.66666667]
|
|
|
|
mean value: 0.8232539682539682
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 1. 1. 1. 0.75
|
|
1. 0.6 0.66666667 1. ]
|
|
|
|
mean value: 0.8683333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.5 0.8 0.8 1. 0.75 1. 0.75 1. 0.5 ]
|
|
|
|
mean value: 0.81
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.65 0.9 0.9 1. 0.75 1. 0.625 0.75 0.75 ]
|
|
|
|
mean value: 0.8325
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.4 0.8 0.8 1. 0.6
|
|
1. 0.5 0.66666667 0.5 ]
|
|
|
|
mean value: 0.7266666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.58
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.00964832 0.01959229 0.01344728 0.01325369 0.01343584 0.0133481
|
|
0.01355219 0.01349425 0.01350141 0.01550746]
|
|
|
|
mean value: 0.013878083229064942
|
|
|
|
key: score_time
|
|
value: [0.01134968 0.01187444 0.01154637 0.01147485 0.0115273 0.01237082
|
|
0.01241136 0.0115962 0.01238823 0.01243496]
|
|
|
|
mean value: 0.011897420883178711
|
|
|
|
key: test_mcc
|
|
value: [ 0.79056942 0.55 -0.05976143 -0.31622777 0. 0.25819889
|
|
0.77459667 0. -0.57735027 -0.57735027]
|
|
|
|
mean value: 0.084267523916793
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.77777778 0.44444444 0.33333333 0.5 0.625
|
|
0.875 0.5 0.25 0.25 ]
|
|
|
|
mean value: 0.5444444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.75 0.28571429 0.25 0.5 0.57142857
|
|
0.85714286 0.33333333 0. 0.4 ]
|
|
|
|
mean value: 0.4804761904761905
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.5 0.33333333 0.5 0.66666667
|
|
1. 0.5 0. 0.33333333]
|
|
|
|
mean value: 0.5583333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.2 0.2 0.5 0.5 0.75 0.25 0. 0.5 ]
|
|
|
|
mean value: 0.44
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.775 0.475 0.35 0.5 0.625 0.875 0.5 0.25 0.25 ]
|
|
|
|
mean value: 0.5475
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.6 0.16666667 0.14285714 0.33333333 0.4
|
|
0.75 0.2 0. 0.25 ]
|
|
|
|
mean value: 0.35928571428571426
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03981113 0.03229427 0.03270364 0.04690862 0.03154755 0.0125289
|
|
0.04503226 0.02168822 0.03168106 0.03145266]
|
|
|
|
mean value: 0.032564830780029294
|
|
|
|
key: score_time
|
|
value: [0.03217363 0.01144433 0.02096105 0.02072716 0.0227952 0.0114665
|
|
0.01161695 0.02121472 0.01988864 0.02282548]
|
|
|
|
mean value: 0.01951136589050293
|
|
|
|
key: test_mcc
|
|
value: [ 0.55 1. 0.31622777 0.35 0.5 0.
|
|
1. 0.25819889 -0.5 0.57735027]
|
|
|
|
mean value: 0.4051776924953625
|
|
|
|
key: train_mcc
|
|
value: [0.94804294 0.92034139 0.92028493 1. 0.92137172 0.9486833
|
|
0.97402153 0.94736842 0.94736842 0.9486833 ]
|
|
|
|
mean value: 0.9476165962968767
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 1. 0.66666667 0.66666667 0.75 0.5
|
|
1. 0.625 0.25 0.75 ]
|
|
|
|
mean value: 0.6986111111111111
|
|
|
|
key: train_accuracy
|
|
value: [0.97333333 0.96 0.96 1. 0.96052632 0.97368421
|
|
0.98684211 0.97368421 0.97368421 0.97368421]
|
|
|
|
mean value: 0.9735438596491228
|
|
|
|
key: test_fscore
|
|
value: [0.75 1. 0.72727273 0.66666667 0.75 0.5
|
|
1. 0.66666667 0.25 0.66666667]
|
|
|
|
mean value: 0.6977272727272726
|
|
|
|
key: train_fscore
|
|
value: [0.97297297 0.96 0.95890411 1. 0.96 0.97297297
|
|
0.98666667 0.97368421 0.97368421 0.97297297]
|
|
|
|
mean value: 0.9731858116227258
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.66666667 0.75 0.75 0.5
|
|
1. 0.6 0.25 1. ]
|
|
|
|
mean value: 0.7266666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 0.97297297 0.97222222 1. 0.97297297 1.
|
|
1. 0.97368421 0.97368421 1. ]
|
|
|
|
mean value: 0.98655365892208
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.8 0.6 0.75 0.5 1. 0.75 0.25 0.5 ]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_recall
|
|
value: [0.94736842 0.94736842 0.94594595 1. 0.94736842 0.94736842
|
|
0.97368421 0.97368421 0.97368421 0.94736842]
|
|
|
|
mean value: 0.9603840682788051
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 1. 0.65 0.675 0.75 0.5 1. 0.625 0.25 0.75 ]
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
mean value: 0.6975
|
|
|
|
key: train_roc_auc
|
|
value: [0.97368421 0.9601707 0.95981508 1. 0.96052632 0.97368421
|
|
0.98684211 0.97368421 0.97368421 0.97368421]
|
|
|
|
mean value: 0.9735775248933144
|
|
|
|
key: test_jcc
|
|
value: [0.6 1. 0.57142857 0.5 0.6 0.33333333
|
|
1. 0.5 0.14285714 0.5 ]
|
|
|
|
mean value: 0.5747619047619048
|
|
|
|
key: train_jcc
|
|
value: [0.94736842 0.92307692 0.92105263 1. 0.92307692 0.94736842
|
|
0.97368421 0.94871795 0.94871795 0.94736842]
|
|
|
|
mean value: 0.9480431848852902
|
|
|
|
MCC on Blind test: -0.25
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.14958286 0.25165272 0.18754363 0.17332387 0.17231584 0.16945744
|
|
0.17469931 0.19163108 0.17544818 0.17475653]
|
|
|
|
mean value: 0.18204114437103272
|
|
|
|
key: score_time
|
|
value: [0.02126646 0.02359891 0.02093983 0.02184081 0.0206728 0.01835012
|
|
0.01161075 0.01900244 0.02072024 0.02028847]
|
|
|
|
mean value: 0.01982908248901367
|
|
|
|
key: test_mcc
|
|
value: [ 0.55 0.79056942 0.31622777 0.35 0.5 0.
|
|
1. 0.25819889 -0.5 0.57735027]
|
|
|
|
mean value: 0.384234633999572
|
|
|
|
key: train_mcc
|
|
value: [0.94804294 0.76031294 0.92028493 1. 0.92137172 0.9486833
|
|
0.97402153 1. 0.94736842 0.9486833 ]
|
|
|
|
mean value: 0.9368769092414002
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.66666667 0.66666667 0.75 0.5
|
|
1. 0.625 0.25 0.75 ]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_accuracy
|
|
value: [0.97333333 0.88 0.96 1. 0.96052632 0.97368421
|
|
0.98684211 1. 0.97368421 0.97368421]
|
|
|
|
mean value: 0.9681754385964912
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.85714286 0.72727273 0.66666667 0.75 0.5
|
|
1. 0.66666667 0.25 0.66666667]
|
|
|
|
mean value: 0.6834415584415584
|
|
|
|
key: train_fscore
|
|
value: [0.97297297 0.88 0.95890411 1. 0.96 0.97297297
|
|
0.98666667 1. 0.97368421 0.97297297]
|
|
|
|
mean value: 0.9678173905700942
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.66666667 0.75 0.75 0.5
|
|
1. 0.6 0.25 1. ]
|
|
|
|
mean value: 0.7266666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 0.89189189 0.97222222 1. 0.97297297 1.
|
|
1. 1. 0.97368421 1. ]
|
|
|
|
mean value: 0.9810771297613403
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.8 0.6 0.75 0.5 1. 0.75 0.25 0.5 ]
|
|
|
|
mean value: 0.665
|
|
|
|
key: train_recall
|
|
value: [0.94736842 0.86842105 0.94594595 1. 0.94736842 0.94736842
|
|
0.97368421 1. 0.97368421 0.94736842]
|
|
|
|
mean value: 0.9551209103840683
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.875 0.65 0.675 0.75 0.5 1. 0.625 0.25 0.75 ]
|
|
|
|
mean value: 0.685
|
|
|
|
key: train_roc_auc
|
|
value: [0.97368421 0.88015647 0.95981508 1. 0.96052632 0.97368421
|
|
0.98684211 1. 0.97368421 0.97368421]
|
|
|
|
mean value: 0.9682076813655761
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.75 0.57142857 0.5 0.6 0.33333333
|
|
1. 0.5 0.14285714 0.5 ]
|
|
|
|
mean value: 0.5497619047619048
|
|
|
|
key: train_jcc
|
|
value: [0.94736842 0.78571429 0.92105263 1. 0.92307692 0.94736842
|
|
0.97368421 1. 0.94871795 0.94736842]
|
|
|
|
mean value: 0.9394351262772316
|
|
|
|
MCC on Blind test: -0.25
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03980541 0.06430697 0.06160903 0.04064298 0.04063821 0.0413835
|
|
0.04094172 0.04044366 0.0412035 0.04032683]
|
|
|
|
mean value: 0.04513018131256104
|
|
|
|
key: score_time
|
|
value: [0.01312709 0.02179003 0.02036476 0.01276827 0.01279616 0.01282287
|
|
0.01313519 0.01290965 0.01307893 0.01304913]
|
|
|
|
mean value: 0.014584207534790039
|
|
|
|
key: test_mcc
|
|
value: [0.66683134 0.87447463 0.84632727 0.80985829 0.84970583 0.76486616
|
|
0.79608094 0.78850326 0.71198003 0.82112188]
|
|
|
|
mean value: 0.7929749620468929
|
|
|
|
key: train_mcc
|
|
value: [0.84617966 0.82541109 0.82596265 0.82332959 0.84420087 0.83065047
|
|
0.86095014 0.83057888 0.85021541 0.83876712]
|
|
|
|
mean value: 0.8376245893035124
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.93333333 0.92222222 0.9 0.92222222 0.87777778
|
|
0.88764045 0.88764045 0.85393258 0.91011236]
|
|
|
|
mean value: 0.8928214731585518
|
|
|
|
key: train_accuracy
|
|
value: [0.92183623 0.91191067 0.91191067 0.91066998 0.92059553 0.91439206
|
|
0.92936803 0.91449814 0.9244114 0.91821561]
|
|
|
|
mean value: 0.9177808321110875
|
|
|
|
key: test_fscore
|
|
value: [0.83516484 0.9375 0.92473118 0.90721649 0.92631579 0.88659794
|
|
0.89795918 0.89583333 0.86315789 0.90909091]
|
|
|
|
mean value: 0.8983567561258463
|
|
|
|
key: train_fscore
|
|
value: [0.92473118 0.91456077 0.91497006 0.91366906 0.92380952 0.91716687
|
|
0.93189964 0.91716687 0.92641737 0.92105263]
|
|
|
|
mean value: 0.9205443978365201
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.88235294 0.89583333 0.84615385 0.88 0.82692308
|
|
0.81481481 0.82692308 0.82 0.93023256]
|
|
|
|
mean value: 0.8549320603985893
|
|
|
|
key: train_precision
|
|
value: [0.89170507 0.88785047 0.88425926 0.88399072 0.88787185 0.88837209
|
|
0.90069284 0.89044289 0.90140845 0.8891455 ]
|
|
|
|
mean value: 0.8905739139830674
|
|
|
|
key: test_recall
|
|
value: [0.84444444 1. 0.95555556 0.97777778 0.97777778 0.95555556
|
|
1. 0.97727273 0.91111111 0.88888889]
|
|
|
|
mean value: 0.9488383838383838
|
|
|
|
key: train_recall
|
|
value: [0.96029777 0.94292804 0.94789082 0.94540943 0.96277916 0.94789082
|
|
0.96534653 0.94554455 0.9528536 0.95533499]
|
|
|
|
mean value: 0.9526275704493526
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.93333333 0.92222222 0.9 0.92222222 0.87777778
|
|
0.88888889 0.88863636 0.85328283 0.91035354]
|
|
|
|
mean value: 0.8930050505050505
|
|
|
|
key: train_roc_auc
|
|
value: [0.92183623 0.91191067 0.91191067 0.91066998 0.92059553 0.91439206
|
|
0.92932339 0.91445962 0.9244466 0.91826155]
|
|
|
|
mean value: 0.9177806304203621
|
|
|
|
key: test_jcc
|
|
value: [0.71698113 0.88235294 0.86 0.83018868 0.8627451 0.7962963
|
|
0.81481481 0.81132075 0.75925926 0.83333333]
|
|
|
|
mean value: 0.8167292308957126
|
|
|
|
key: train_jcc
|
|
value: [0.86 0.84257206 0.84326711 0.8410596 0.85840708 0.84700665
|
|
0.87248322 0.84700665 0.86292135 0.85365854]
|
|
|
|
mean value: 0.8528382262692936
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.88078547 1.05674505 0.95202208 1.06442595 0.93282032 0.93836689
|
|
1.05564332 0.98470354 1.07442117 0.96674395]
|
|
|
|
mean value: 0.9906677722930908
|
|
|
|
key: score_time
|
|
value: [0.01298785 0.0150249 0.01564097 0.01507306 0.01628017 0.01502991
|
|
0.01530552 0.01224422 0.01508832 0.01525259]
|
|
|
|
mean value: 0.014792752265930176
|
|
|
|
key: test_mcc
|
|
value: [0.89442719 0.87447463 0.97801929 0.95650071 0.93541435 0.83553169
|
|
0.89341253 0.89341253 0.85305908 0.93465477]
|
|
|
|
mean value: 0.9048906772420442
|
|
|
|
key: train_mcc
|
|
value: [0.98034206 0.95866971 0.97065374 0.95866971 0.98034206 0.97791139
|
|
0.96828329 0.9516004 0.9706906 0.96588655]
|
|
|
|
mean value: 0.9683049514203413
|
|
|
|
key: test_accuracy
|
|
value: [0.94444444 0.93333333 0.98888889 0.97777778 0.96666667 0.91111111
|
|
0.94382022 0.94382022 0.92134831 0.96629213]
|
|
|
|
mean value: 0.9497503121098627
|
|
|
|
key: train_accuracy
|
|
value: [0.99007444 0.97890819 0.98511166 0.97890819 0.99007444 0.98883375
|
|
0.98389095 0.97521685 0.98513011 0.9826518 ]
|
|
|
|
mean value: 0.983880038496899
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 0.9375 0.98901099 0.97826087 0.96774194 0.91836735
|
|
0.94623656 0.94623656 0.92783505 0.96774194]
|
|
|
|
mean value: 0.9526299667361318
|
|
|
|
key: train_fscore
|
|
value: [0.99017199 0.97934386 0.98533007 0.97934386 0.99017199 0.98895706
|
|
0.98416565 0.97584541 0.98533007 0.98292683]
|
|
|
|
mean value: 0.984158680162365
|
|
|
|
key: test_precision
|
|
value: [0.9 0.88235294 0.97826087 0.95744681 0.9375 0.8490566
|
|
0.89795918 0.89795918 0.86538462 0.9375 ]
|
|
|
|
mean value: 0.9103420205757465
|
|
|
|
key: train_precision
|
|
value: [0.98053528 0.95952381 0.97108434 0.95952381 0.98053528 0.97815534
|
|
0.96882494 0.95283019 0.97108434 0.96642686]
|
|
|
|
mean value: 0.9688524180403342
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94444444 0.93333333 0.98888889 0.97777778 0.96666667 0.91111111
|
|
0.94444444 0.94444444 0.92045455 0.96590909]
|
|
|
|
mean value: 0.9497474747474748
|
|
|
|
key: train_roc_auc
|
|
value: [0.99007444 0.97890819 0.98511166 0.97890819 0.99007444 0.98883375
|
|
0.98387097 0.9751861 0.98514851 0.98267327]
|
|
|
|
mean value: 0.9838789524113702
|
|
|
|
key: test_jcc
|
|
value: [0.9 0.88235294 0.97826087 0.95744681 0.9375 0.8490566
|
|
0.89795918 0.89795918 0.86538462 0.9375 ]
|
|
|
|
mean value: 0.9103420205757465
|
|
|
|
key: train_jcc
|
|
value: [0.98053528 0.95952381 0.97108434 0.95952381 0.98053528 0.97815534
|
|
0.96882494 0.95283019 0.97108434 0.96642686]
|
|
|
|
mean value: 0.9688524180403342
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01646352 0.01168895 0.01131034 0.01112556 0.01102638 0.01108241
|
|
0.01112914 0.01134229 0.01118493 0.01111817]
|
|
|
|
mean value: 0.011747169494628906
|
|
|
|
key: score_time
|
|
value: [0.01463628 0.00956249 0.00938582 0.00910854 0.00907445 0.00928593
|
|
0.00953078 0.00911522 0.00926828 0.00906205]
|
|
|
|
mean value: 0.009802985191345214
|
|
|
|
key: test_mcc
|
|
value: [0.17364863 0.31025261 0.1470871 0.20683508 0.15512631 0.20100756
|
|
0.49658063 0.073149 0.36130904 0.28898097]
|
|
|
|
mean value: 0.24139769377228132
|
|
|
|
key: train_mcc
|
|
value: [0.31473321 0.24479475 0.26811672 0.25361875 0.25439568 0.26650824
|
|
0.23025488 0.26540787 0.23158044 0.25674512]
|
|
|
|
mean value: 0.25861556603489577
|
|
|
|
key: test_accuracy
|
|
value: [0.57777778 0.63333333 0.56666667 0.58888889 0.56666667 0.58888889
|
|
0.70786517 0.52808989 0.66292135 0.62921348]
|
|
|
|
mean value: 0.6050312109862671
|
|
|
|
key: train_accuracy
|
|
value: [0.6426799 0.60794045 0.617866 0.61166253 0.61166253 0.617866
|
|
0.60223048 0.61710037 0.60099133 0.6133829 ]
|
|
|
|
mean value: 0.6143382499900067
|
|
|
|
key: test_fscore
|
|
value: [0.65454545 0.7079646 0.64220183 0.67256637 0.65486726 0.66666667
|
|
0.76785714 0.625 0.72727273 0.7027027 ]
|
|
|
|
mean value: 0.6821644758995575
|
|
|
|
key: train_fscore
|
|
value: [0.70491803 0.68273092 0.69138277 0.68605817 0.68668669 0.69014085
|
|
0.67738693 0.69069069 0.67864271 0.6861167 ]
|
|
|
|
mean value: 0.6874754468429529
|
|
|
|
key: test_precision
|
|
value: [0.55384615 0.58823529 0.546875 0.55882353 0.54411765 0.56060606
|
|
0.63235294 0.51470588 0.61538462 0.59090909]
|
|
|
|
mean value: 0.5705856214863567
|
|
|
|
key: train_precision
|
|
value: [0.60034904 0.57335582 0.57983193 0.57575758 0.57550336 0.58037225
|
|
0.57021997 0.57983193 0.56761269 0.57698816]
|
|
|
|
mean value: 0.5779822715086764
|
|
|
|
key: test_recall
|
|
value: [0.8 0.88888889 0.77777778 0.84444444 0.82222222 0.82222222
|
|
0.97727273 0.79545455 0.88888889 0.86666667]
|
|
|
|
mean value: 0.8483838383838384
|
|
|
|
key: train_recall
|
|
value: [0.85359801 0.84367246 0.8560794 0.84863524 0.85111663 0.85111663
|
|
0.83415842 0.8539604 0.84367246 0.84615385]
|
|
|
|
mean value: 0.8482163476893595
|
|
|
|
key: test_roc_auc
|
|
value: [0.57777778 0.63333333 0.56666667 0.58888889 0.56666667 0.58888889
|
|
0.71085859 0.53106061 0.66035354 0.62651515]
|
|
|
|
mean value: 0.60510101010101
|
|
|
|
key: train_roc_auc
|
|
value: [0.6426799 0.60794045 0.617866 0.61166253 0.61166253 0.617866
|
|
0.60194273 0.6168065 0.60129167 0.61367098]
|
|
|
|
mean value: 0.6143389307913422
|
|
|
|
key: test_jcc
|
|
value: [0.48648649 0.54794521 0.47297297 0.50666667 0.48684211 0.5
|
|
0.62318841 0.45454545 0.57142857 0.54166667]
|
|
|
|
mean value: 0.519174253530653
|
|
|
|
key: train_jcc
|
|
value: [0.5443038 0.51829268 0.52833078 0.5221374 0.52286585 0.52688172
|
|
0.51215805 0.52752294 0.51359517 0.52220521]
|
|
|
|
mean value: 0.5238293603467037
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01165771 0.01598382 0.01595592 0.01608229 0.01595116 0.01601267
|
|
0.01630902 0.01623654 0.01601982 0.01615214]
|
|
|
|
mean value: 0.015636110305786134
|
|
|
|
key: score_time
|
|
value: [0.01216125 0.01231194 0.01227331 0.01232648 0.01237249 0.01237202
|
|
0.01231289 0.01234794 0.01239395 0.01236701]
|
|
|
|
mean value: 0.012323927879333497
|
|
|
|
key: test_mcc
|
|
value: [0.28284271 0.51854497 0.60540551 0.5500191 0.5280169 0.52421865
|
|
0.58600584 0.3518082 0.45063372 0.49157739]
|
|
|
|
mean value: 0.4889073004048967
|
|
|
|
key: train_mcc
|
|
value: [0.54126588 0.51549131 0.50604987 0.51844604 0.50371456 0.51263006
|
|
0.4413118 0.53137679 0.49612078 0.51185264]
|
|
|
|
mean value: 0.5078259736044609
|
|
|
|
key: test_accuracy
|
|
value: [0.63333333 0.74444444 0.8 0.76666667 0.74444444 0.75555556
|
|
0.78651685 0.66292135 0.71910112 0.73033708]
|
|
|
|
mean value: 0.7343320848938827
|
|
|
|
key: train_accuracy
|
|
value: [0.76054591 0.74813896 0.74193548 0.74937965 0.74193548 0.74565757
|
|
0.71623296 0.755886 0.74101611 0.74597274]
|
|
|
|
mean value: 0.7446700858800631
|
|
|
|
key: test_fscore
|
|
value: [0.68571429 0.78095238 0.8125 0.79207921 0.78504673 0.78
|
|
0.80412371 0.71153846 0.75247525 0.77358491]
|
|
|
|
mean value: 0.7678014929623219
|
|
|
|
key: train_fscore
|
|
value: [0.78907104 0.77814208 0.77489177 0.77947598 0.77342048 0.77741585
|
|
0.7424072 0.78516903 0.76803552 0.77644493]
|
|
|
|
mean value: 0.7744473877569741
|
|
|
|
key: test_precision
|
|
value: [0.6 0.68333333 0.76470588 0.71428571 0.67741935 0.70909091
|
|
0.73584906 0.61666667 0.67857143 0.67213115]
|
|
|
|
mean value: 0.685205349328446
|
|
|
|
key: train_precision
|
|
value: [0.70507812 0.6953125 0.68714012 0.69590643 0.68932039 0.69111969
|
|
0.68041237 0.70175439 0.69477912 0.692607 ]
|
|
|
|
mean value: 0.6933430129836738
|
|
|
|
key: test_recall
|
|
value: [0.8 0.91111111 0.86666667 0.88888889 0.93333333 0.86666667
|
|
0.88636364 0.84090909 0.84444444 0.91111111]
|
|
|
|
mean value: 0.8749494949494949
|
|
|
|
key: train_recall
|
|
value: [0.89578164 0.88337469 0.88833747 0.88585608 0.8808933 0.88833747
|
|
0.81683168 0.89108911 0.85856079 0.88337469]
|
|
|
|
mean value: 0.8772436921111466
|
|
|
|
key: test_roc_auc
|
|
value: [0.63333333 0.74444444 0.8 0.76666667 0.74444444 0.75555556
|
|
0.78762626 0.66489899 0.71767677 0.72828283]
|
|
|
|
mean value: 0.7342929292929293
|
|
|
|
key: train_roc_auc
|
|
value: [0.76054591 0.74813896 0.74193548 0.74937965 0.74193548 0.74565757
|
|
0.71610815 0.75571825 0.74116159 0.74614279]
|
|
|
|
mean value: 0.7446723828710414
|
|
|
|
key: test_jcc
|
|
value: [0.52173913 0.640625 0.68421053 0.6557377 0.64615385 0.63934426
|
|
0.67241379 0.55223881 0.6031746 0.63076923]
|
|
|
|
mean value: 0.6246406903134964
|
|
|
|
key: train_jcc
|
|
value: [0.65162455 0.63685152 0.63250883 0.63864043 0.63055062 0.63587922
|
|
0.59033989 0.64631957 0.62342342 0.63458111]
|
|
|
|
mean value: 0.6320719163089898
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.0149169 0.01213098 0.01175261 0.01165867 0.0107553 0.01230001
|
|
0.01095843 0.011729 0.01247191 0.01174283]
|
|
|
|
mean value: 0.012041664123535157
|
|
|
|
key: score_time
|
|
value: [0.04167914 0.01544523 0.01555729 0.01481891 0.01449847 0.01482058
|
|
0.01448679 0.014256 0.01969123 0.01963949]
|
|
|
|
mean value: 0.018489313125610352
|
|
|
|
key: test_mcc
|
|
value: [0.6894997 0.83553169 0.85485041 0.83553169 0.70710678 0.87447463
|
|
0.81495457 0.79608094 0.75773523 0.83347626]
|
|
|
|
mean value: 0.7999241901652472
|
|
|
|
key: train_mcc
|
|
value: [0.85852721 0.84551382 0.84335796 0.84983659 0.84120575 0.83049832
|
|
0.84350869 0.84350869 0.84572246 0.8435698 ]
|
|
|
|
mean value: 0.8445249298796176
|
|
|
|
key: test_accuracy
|
|
value: [0.82222222 0.91111111 0.92222222 0.91111111 0.83333333 0.93333333
|
|
0.8988764 0.88764045 0.86516854 0.91011236]
|
|
|
|
mean value: 0.8895131086142322
|
|
|
|
key: train_accuracy
|
|
value: [0.92431762 0.91687345 0.91563275 0.91935484 0.91439206 0.90818859
|
|
0.9157373 0.9157373 0.91697646 0.9157373 ]
|
|
|
|
mean value: 0.9162947657131613
|
|
|
|
key: test_fscore
|
|
value: [0.8490566 0.91836735 0.92783505 0.91836735 0.85714286 0.9375
|
|
0.90721649 0.89795918 0.88235294 0.91836735]
|
|
|
|
mean value: 0.9014165172974461
|
|
|
|
key: train_fscore
|
|
value: [0.92964245 0.92325315 0.9221968 0.92537313 0.92114286 0.91590909
|
|
0.92237443 0.92237443 0.92325315 0.9221968 ]
|
|
|
|
mean value: 0.9227716278833067
|
|
|
|
key: test_precision
|
|
value: [0.73770492 0.8490566 0.86538462 0.8490566 0.75 0.88235294
|
|
0.83018868 0.81481481 0.78947368 0.8490566 ]
|
|
|
|
mean value: 0.8217089464185252
|
|
|
|
key: train_precision
|
|
value: [0.86853448 0.85744681 0.85562633 0.86111111 0.85381356 0.84486373
|
|
0.8559322 0.8559322 0.85744681 0.85562633]
|
|
|
|
mean value: 0.8566333562576701
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.82222222 0.91111111 0.92222222 0.91111111 0.83333333 0.93333333
|
|
0.9 0.88888889 0.86363636 0.90909091]
|
|
|
|
mean value: 0.8894949494949494
|
|
|
|
key: train_roc_auc
|
|
value: [0.92431762 0.91687345 0.91563275 0.91935484 0.91439206 0.90818859
|
|
0.91563275 0.91563275 0.91707921 0.91584158]
|
|
|
|
mean value: 0.9162945605974989
|
|
|
|
key: test_jcc
|
|
value: [0.73770492 0.8490566 0.86538462 0.8490566 0.75 0.88235294
|
|
0.83018868 0.81481481 0.78947368 0.8490566 ]
|
|
|
|
mean value: 0.8217089464185252
|
|
|
|
key: train_jcc
|
|
value: [0.86853448 0.85744681 0.85562633 0.86111111 0.85381356 0.84486373
|
|
0.8559322 0.8559322 0.85744681 0.85562633]
|
|
|
|
mean value: 0.8566333562576701
|
|
|
|
MCC on Blind test: -0.09
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04604912 0.0464673 0.04788375 0.04734874 0.04818106 0.04369974
|
|
0.03886938 0.04239893 0.03977442 0.04288077]
|
|
|
|
mean value: 0.04435532093048096
|
|
|
|
key: score_time
|
|
value: [0.01900125 0.01972008 0.01999474 0.01988506 0.01963735 0.0195322
|
|
0.01690316 0.01687288 0.01726079 0.01727414]
|
|
|
|
mean value: 0.01860816478729248
|
|
|
|
key: test_mcc
|
|
value: [0.60059347 0.84970583 0.76486616 0.80498447 0.82962978 0.87011096
|
|
0.87330789 0.7979798 0.80256791 0.77614967]
|
|
|
|
mean value: 0.7969895942107378
|
|
|
|
key: train_mcc
|
|
value: [0.86638914 0.85196687 0.88198009 0.8593257 0.87676196 0.89133081
|
|
0.87728835 0.85471889 0.89808411 0.84462396]
|
|
|
|
mean value: 0.8702469893580047
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.92222222 0.87777778 0.9 0.91111111 0.93333333
|
|
0.93258427 0.8988764 0.8988764 0.88764045]
|
|
|
|
mean value: 0.8962421972534332
|
|
|
|
key: train_accuracy
|
|
value: [0.93300248 0.92555831 0.94044665 0.9292804 0.93672457 0.94416873
|
|
0.93680297 0.92688971 0.94795539 0.92193309]
|
|
|
|
mean value: 0.9342762306247137
|
|
|
|
key: test_fscore
|
|
value: [0.80434783 0.92631579 0.88659794 0.90526316 0.91666667 0.93617021
|
|
0.93617021 0.8988764 0.90526316 0.88636364]
|
|
|
|
mean value: 0.9002035002551044
|
|
|
|
key: train_fscore
|
|
value: [0.93398533 0.92718447 0.94188862 0.93074119 0.93935791 0.94636472
|
|
0.93964497 0.9286578 0.94964029 0.92345079]
|
|
|
|
mean value: 0.9360916081122809
|
|
|
|
key: test_precision
|
|
value: [0.78723404 0.88 0.82692308 0.86 0.8627451 0.89795918
|
|
0.88 0.88888889 0.86 0.90697674]
|
|
|
|
mean value: 0.8650727034263889
|
|
|
|
key: train_precision
|
|
value: [0.92048193 0.90736342 0.91962175 0.91190476 0.90182648 0.91055046
|
|
0.90022676 0.90780142 0.9187935 0.9047619 ]
|
|
|
|
mean value: 0.9103332386237517
|
|
|
|
key: test_recall
|
|
value: [0.82222222 0.97777778 0.95555556 0.95555556 0.97777778 0.97777778
|
|
1. 0.90909091 0.95555556 0.86666667]
|
|
|
|
mean value: 0.9397979797979799
|
|
|
|
key: train_recall
|
|
value: [0.94789082 0.94789082 0.96526055 0.95037221 0.98014888 0.98511166
|
|
0.98267327 0.95049505 0.98263027 0.94292804]
|
|
|
|
mean value: 0.963540156745203
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.92222222 0.87777778 0.9 0.91111111 0.93333333
|
|
0.93333333 0.8989899 0.89823232 0.88787879]
|
|
|
|
mean value: 0.8962878787878789
|
|
|
|
key: train_roc_auc
|
|
value: [0.93300248 0.92555831 0.94044665 0.9292804 0.93672457 0.94416873
|
|
0.93674606 0.92686043 0.9479983 0.92195907]
|
|
|
|
mean value: 0.9342745006510577
|
|
|
|
key: test_jcc
|
|
value: [0.67272727 0.8627451 0.7962963 0.82692308 0.84615385 0.88
|
|
0.88 0.81632653 0.82692308 0.79591837]
|
|
|
|
mean value: 0.8204013565021968
|
|
|
|
key: train_jcc
|
|
value: [0.87614679 0.86425339 0.89016018 0.87045455 0.88565022 0.89819005
|
|
0.88616071 0.86681716 0.90410959 0.85778781]
|
|
|
|
mean value: 0.8799730450107771
|
|
|
|
MCC on Blind test: -0.09
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.32299876 1.0416944 2.5182004 1.80774093 1.76536298 1.31071472
|
|
2.03246307 1.74967217 1.39646125 2.68330717]
|
|
|
|
mean value: 1.7628615856170655
|
|
|
|
key: score_time
|
|
value: [0.01254606 0.01260376 0.01270628 0.01266837 0.01263642 0.01265955
|
|
0.01518583 0.01284623 0.01271105 0.01276922]
|
|
|
|
mean value: 0.01293327808380127
|
|
|
|
key: test_mcc
|
|
value: [0.73405869 0.89442719 0.93541435 0.91473203 0.85485041 0.87447463
|
|
0.97777778 0.93282828 0.93282828 0.95599503]
|
|
|
|
mean value: 0.9007386680314277
|
|
|
|
key: train_mcc
|
|
value: [0.93810679 0.93743687 0.96824584 0.97306727 0.96584353 0.88275107
|
|
0.99505556 0.95581466 0.97274157 0.98768443]
|
|
|
|
mean value: 0.9576747563223209
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.94444444 0.96666667 0.95555556 0.92222222 0.93333333
|
|
0.98876404 0.96629213 0.96629213 0.97752809]
|
|
|
|
mean value: 0.9487765293383271
|
|
|
|
key: train_accuracy
|
|
value: [0.96898263 0.96774194 0.98387097 0.98635236 0.98263027 0.93796526
|
|
0.99752169 0.97769517 0.98636927 0.99380421]
|
|
|
|
mean value: 0.9782933758890109
|
|
|
|
key: test_fscore
|
|
value: [0.86363636 0.94736842 0.96774194 0.95744681 0.92783505 0.9375
|
|
0.98876404 0.96629213 0.96629213 0.97826087]
|
|
|
|
mean value: 0.9501137764401856
|
|
|
|
key: train_fscore
|
|
value: [0.96871089 0.96875 0.98412698 0.98653611 0.98292683 0.94158879
|
|
0.99753086 0.97738693 0.98636927 0.99383477]
|
|
|
|
mean value: 0.9787761434418515
|
|
|
|
key: test_precision
|
|
value: [0.88372093 0.9 0.9375 0.91836735 0.86538462 0.88235294
|
|
0.97777778 0.95555556 0.97727273 0.95744681]
|
|
|
|
mean value: 0.9255378702849119
|
|
|
|
key: train_precision
|
|
value: [0.97727273 0.93939394 0.96875 0.97342995 0.96642686 0.88962472
|
|
0.99507389 0.99234694 0.98514851 0.9877451 ]
|
|
|
|
mean value: 0.9675212644224315
|
|
|
|
key: test_recall
|
|
value: [0.84444444 1. 1. 1. 1. 1.
|
|
1. 0.97727273 0.95555556 1. ]
|
|
|
|
mean value: 0.9777272727272728
|
|
|
|
key: train_recall
|
|
value: [0.96029777 1. 1. 1. 1. 1.
|
|
1. 0.96287129 0.98759305 1. ]
|
|
|
|
mean value: 0.9910762105987274
|
|
|
|
key: test_roc_auc
|
|
value: [0.86666667 0.94444444 0.96666667 0.95555556 0.92222222 0.93333333
|
|
0.98888889 0.96641414 0.96641414 0.97727273]
|
|
|
|
mean value: 0.9487878787878787
|
|
|
|
key: train_roc_auc
|
|
value: [0.96898263 0.96774194 0.98387097 0.98635236 0.98263027 0.93796526
|
|
0.99751861 0.97771356 0.98637078 0.99381188]
|
|
|
|
mean value: 0.9782958258605017
|
|
|
|
key: test_jcc
|
|
value: [0.76 0.9 0.9375 0.91836735 0.86538462 0.88235294
|
|
0.97777778 0.93478261 0.93478261 0.95744681]
|
|
|
|
mean value: 0.9068394707179582
|
|
|
|
key: train_jcc
|
|
value: [0.93932039 0.93939394 0.96875 0.97342995 0.96642686 0.88962472
|
|
0.99507389 0.95577396 0.97310513 0.9877451 ]
|
|
|
|
mean value: 0.958864394192239
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04548621 0.03724217 0.03563833 0.03454995 0.03853965 0.03229499
|
|
0.0399425 0.03496361 0.03664589 0.03590989]
|
|
|
|
mean value: 0.03712131977081299
|
|
|
|
key: score_time
|
|
value: [0.00990534 0.00993133 0.00928736 0.00924993 0.00990772 0.00989246
|
|
0.009763 0.01024699 0.01019883 0.01022339]
|
|
|
|
mean value: 0.009860634803771973
|
|
|
|
key: test_mcc
|
|
value: [0.97801929 0.87447463 0.95650071 0.97801929 0.91473203 0.91473203
|
|
0.91388467 0.95603853 0.95599503 0.95599503]
|
|
|
|
mean value: 0.9398391256660623
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98888889 0.93333333 0.97777778 0.98888889 0.95555556 0.95555556
|
|
0.95505618 0.97752809 0.97752809 0.97752809]
|
|
|
|
mean value: 0.9687640449438202
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98901099 0.9375 0.97826087 0.98901099 0.95744681 0.95744681
|
|
0.95652174 0.97777778 0.97826087 0.97826087]
|
|
|
|
mean value: 0.969949772064712
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0.97826087 0.88235294 0.95744681 0.97826087 0.91836735 0.91836735
|
|
0.91666667 0.95652174 0.95744681 0.95744681]
|
|
|
|
mean value: 0.9421138205513473
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98888889 0.93333333 0.97777778 0.98888889 0.95555556 0.95555556
|
|
0.95555556 0.97777778 0.97727273 0.97727273]
|
|
|
|
mean value: 0.9687878787878788
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.97826087 0.88235294 0.95744681 0.97826087 0.91836735 0.91836735
|
|
0.91666667 0.95652174 0.95744681 0.95744681]
|
|
|
|
mean value: 0.9421138205513473
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.15255928 0.15695834 0.13940191 0.13866329 0.14174914 0.14894533
|
|
0.14612293 0.13823032 0.13658357 0.13851357]
|
|
|
|
mean value: 0.1437727689743042
|
|
|
|
key: score_time
|
|
value: [0.02015829 0.02042937 0.01905417 0.0188024 0.01905012 0.02007651
|
|
0.02382255 0.01960826 0.01947641 0.01860738]
|
|
|
|
mean value: 0.019908547401428223
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.97777778 0.97777778 0.97776667 1. ]
|
|
|
|
mean value: 0.9933322229146034
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98876404 0.98876404 0.98876404 1. ]
|
|
|
|
mean value: 0.996629213483146
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98876404 0.98876404 0.98901099 1. ]
|
|
|
|
mean value: 0.996653907889863
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.97777778 0.97777778 0.97826087 1. ]
|
|
|
|
mean value: 0.9933816425120773
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98888889 0.98888889 0.98863636 1. ]
|
|
|
|
mean value: 0.9966414141414142
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.97777778 0.97777778 0.97826087 1. ]
|
|
|
|
mean value: 0.9933816425120773
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01208663 0.01212192 0.01155543 0.01209855 0.01162434 0.01164389
|
|
0.01156378 0.0123167 0.01186776 0.01177359]
|
|
|
|
mean value: 0.01186525821685791
|
|
|
|
key: score_time
|
|
value: [0.00910425 0.00969696 0.009341 0.00939822 0.00994444 0.00906491
|
|
0.00905013 0.00927544 0.0091598 0.00912666]
|
|
|
|
mean value: 0.009316182136535645
|
|
|
|
key: test_mcc
|
|
value: [0.89442719 0.95650071 0.93541435 0.95650071 0.91473203 0.95650071
|
|
0.97777778 0.95603853 0.87294449 0.91371736]
|
|
|
|
mean value: 0.9334553865958252
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.94444444 0.97777778 0.96666667 0.97777778 0.95555556 0.97777778
|
|
0.98876404 0.97752809 0.93258427 0.95505618]
|
|
|
|
mean value: 0.9653932584269662
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94736842 0.97826087 0.96774194 0.97826087 0.95744681 0.97826087
|
|
0.98876404 0.97777778 0.9375 0.95744681]
|
|
|
|
mean value: 0.966882840497503
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.95744681 0.9375 0.95744681 0.91836735 0.95744681
|
|
0.97777778 0.95652174 0.88235294 0.91836735]
|
|
|
|
mean value: 0.9363227577494149
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94444444 0.97777778 0.96666667 0.97777778 0.95555556 0.97777778
|
|
0.98888889 0.97777778 0.93181818 0.95454545]
|
|
|
|
mean value: 0.9653030303030303
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9 0.95744681 0.9375 0.95744681 0.91836735 0.95744681
|
|
0.97777778 0.95652174 0.88235294 0.91836735]
|
|
|
|
mean value: 0.9363227577494149
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.97941923 1.9350667 1.93848228 1.93526959 1.95086288 1.95586967
|
|
1.95583773 1.94122505 2.02016449 1.96089768]
|
|
|
|
mean value: 1.9573095321655274
|
|
|
|
key: score_time
|
|
value: [0.09524894 0.09504771 0.09499717 0.10355234 0.09763241 0.09732175
|
|
0.09709549 0.1024034 0.09971023 0.09767818]
|
|
|
|
mean value: 0.09806876182556153
|
|
|
|
key: test_mcc
|
|
value: [1. 0.97801929 0.97801929 0.97801929 1. 0.97801929
|
|
0.95603853 1. 0.95599503 1. ]
|
|
|
|
mean value: 0.9824110731525098
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.98888889 0.98888889 0.98888889 1. 0.98888889
|
|
0.97752809 1. 0.97752809 1. ]
|
|
|
|
mean value: 0.9910611735330837
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.98901099 0.98901099 0.98901099 1. 0.98901099
|
|
0.97777778 1. 0.97826087 1. ]
|
|
|
|
mean value: 0.9912082603386951
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.97826087 0.97826087 0.97826087 1. 0.97826087
|
|
0.95652174 1. 0.95744681 1. ]
|
|
|
|
mean value: 0.9827012025901942
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.98888889 0.98888889 0.98888889 1. 0.98888889
|
|
0.97777778 1. 0.97727273 1. ]
|
|
|
|
mean value: 0.9910606060606061
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.97826087 0.97826087 0.97826087 1. 0.97826087
|
|
0.95652174 1. 0.95744681 1. ]
|
|
|
|
mean value: 0.9827012025901942
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.99406123 1.08811426 1.03100777 1.03497696 1.066293 1.0855794
|
|
1.03748584 1.13571644 1.06132603 1.03552103]
|
|
|
|
mean value: 1.057008194923401
|
|
|
|
key: score_time
|
|
value: [0.19549179 0.27708125 0.27080369 0.29517531 0.29878497 0.2632792
|
|
0.25412655 0.26967049 0.28253484 0.20891333]
|
|
|
|
mean value: 0.2615861415863037
|
|
|
|
key: test_mcc
|
|
value: [1. 0.97801929 0.95650071 0.95650071 1. 0.95650071
|
|
0.95603853 1. 0.95599503 0.97776667]
|
|
|
|
mean value: 0.9737321667370454
|
|
|
|
key: train_mcc
|
|
value: [0.98277854 0.98277854 0.98766907 0.98522086 0.98277854 0.98522086
|
|
0.99013526 0.98768405 0.9901355 0.98523929]
|
|
|
|
mean value: 0.9859640515596869
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.98888889 0.97777778 0.97777778 1. 0.97777778
|
|
0.97752809 1. 0.97752809 0.98876404]
|
|
|
|
mean value: 0.9866042446941323
|
|
|
|
key: train_accuracy
|
|
value: [0.99131514 0.99131514 0.99379653 0.99255583 0.99131514 0.99255583
|
|
0.99504337 0.99380421 0.99504337 0.99256506]
|
|
|
|
mean value: 0.9929309607928147
|
|
|
|
key: test_fscore
|
|
value: [1. 0.98901099 0.97826087 0.97826087 1. 0.97826087
|
|
0.97777778 1. 0.97826087 0.98901099]
|
|
|
|
mean value: 0.9868843234060626
|
|
|
|
key: train_fscore
|
|
value: [0.99138991 0.99138991 0.99383477 0.99261084 0.99138991 0.99261084
|
|
0.99507389 0.99384994 0.99506173 0.99261084]
|
|
|
|
mean value: 0.992982258441931
|
|
|
|
key: test_precision
|
|
value: [1. 0.97826087 0.95744681 0.95744681 1. 0.95744681
|
|
0.95652174 1. 0.95744681 0.97826087]
|
|
|
|
mean value: 0.9742830712303423
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.98292683 0.98292683 0.9877451 0.98533007 0.98292683 0.98533007
|
|
0.99019608 0.98777506 0.99017199 0.98533007]
|
|
|
|
mean value: 0.9860658935621051
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.98888889 0.97777778 0.97777778 1. 0.97777778
|
|
0.97777778 1. 0.97727273 0.98863636]
|
|
|
|
mean value: 0.9865909090909091
|
|
|
|
key: train_roc_auc
|
|
value: [0.99131514 0.99131514 0.99379653 0.99255583 0.99131514 0.99255583
|
|
0.99503722 0.99379653 0.9950495 0.99257426]
|
|
|
|
mean value: 0.9929311107289389
|
|
|
|
key: test_jcc
|
|
value: [1. 0.97826087 0.95744681 0.95744681 1. 0.95744681
|
|
0.95652174 1. 0.95744681 0.97826087]
|
|
|
|
mean value: 0.9742830712303423
|
|
|
|
key: train_jcc
|
|
value: [0.98292683 0.98292683 0.9877451 0.98533007 0.98292683 0.98533007
|
|
0.99019608 0.98777506 0.99017199 0.98533007]
|
|
|
|
mean value: 0.9860658935621051
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01308751 0.01592898 0.01608396 0.01611495 0.01604581 0.01637912
|
|
0.01636076 0.0162065 0.01660514 0.01611423]
|
|
|
|
mean value: 0.015892696380615235
|
|
|
|
key: score_time
|
|
value: [0.01197648 0.01233864 0.01245236 0.01234746 0.01242995 0.01240921
|
|
0.0123353 0.01237273 0.012398 0.01241159]
|
|
|
|
mean value: 0.012347173690795899
|
|
|
|
key: test_mcc
|
|
value: [0.28284271 0.51854497 0.60540551 0.5500191 0.5280169 0.52421865
|
|
0.58600584 0.3518082 0.45063372 0.49157739]
|
|
|
|
mean value: 0.4889073004048967
|
|
|
|
key: train_mcc
|
|
value: [0.54126588 0.51549131 0.50604987 0.51844604 0.50371456 0.51263006
|
|
0.4413118 0.53137679 0.49612078 0.51185264]
|
|
|
|
mean value: 0.5078259736044609
|
|
|
|
key: test_accuracy
|
|
value: [0.63333333 0.74444444 0.8 0.76666667 0.74444444 0.75555556
|
|
0.78651685 0.66292135 0.71910112 0.73033708]
|
|
|
|
mean value: 0.7343320848938827
|
|
|
|
key: train_accuracy
|
|
value: [0.76054591 0.74813896 0.74193548 0.74937965 0.74193548 0.74565757
|
|
0.71623296 0.755886 0.74101611 0.74597274]
|
|
|
|
mean value: 0.7446700858800631
|
|
|
|
key: test_fscore
|
|
value: [0.68571429 0.78095238 0.8125 0.79207921 0.78504673 0.78
|
|
0.80412371 0.71153846 0.75247525 0.77358491]
|
|
|
|
mean value: 0.7678014929623219
|
|
|
|
key: train_fscore
|
|
value: [0.78907104 0.77814208 0.77489177 0.77947598 0.77342048 0.77741585
|
|
0.7424072 0.78516903 0.76803552 0.77644493]
|
|
|
|
mean value: 0.7744473877569741
|
|
|
|
key: test_precision
|
|
value: [0.6 0.68333333 0.76470588 0.71428571 0.67741935 0.70909091
|
|
0.73584906 0.61666667 0.67857143 0.67213115]
|
|
|
|
mean value: 0.685205349328446
|
|
|
|
key: train_precision
|
|
value: [0.70507812 0.6953125 0.68714012 0.69590643 0.68932039 0.69111969
|
|
0.68041237 0.70175439 0.69477912 0.692607 ]
|
|
|
|
mean value: 0.6933430129836738
|
|
|
|
key: test_recall
|
|
value: [0.8 0.91111111 0.86666667 0.88888889 0.93333333 0.86666667
|
|
0.88636364 0.84090909 0.84444444 0.91111111]
|
|
|
|
mean value: 0.8749494949494949
|
|
|
|
key: train_recall
|
|
value: [0.89578164 0.88337469 0.88833747 0.88585608 0.8808933 0.88833747
|
|
0.81683168 0.89108911 0.85856079 0.88337469]
|
|
|
|
mean value: 0.8772436921111466
|
|
|
|
key: test_roc_auc
|
|
value: [0.63333333 0.74444444 0.8 0.76666667 0.74444444 0.75555556
|
|
0.78762626 0.66489899 0.71767677 0.72828283]
|
|
|
|
mean value: 0.7342929292929293
|
|
|
|
key: train_roc_auc
|
|
value: [0.76054591 0.74813896 0.74193548 0.74937965 0.74193548 0.74565757
|
|
0.71610815 0.75571825 0.74116159 0.74614279]
|
|
|
|
mean value: 0.7446723828710414
|
|
|
|
key: test_jcc
|
|
value: [0.52173913 0.640625 0.68421053 0.6557377 0.64615385 0.63934426
|
|
0.67241379 0.55223881 0.6031746 0.63076923]
|
|
|
|
mean value: 0.6246406903134964
|
|
|
|
key: train_jcc
|
|
value: [0.65162455 0.63685152 0.63250883 0.63864043 0.63055062 0.63587922
|
|
0.59033989 0.64631957 0.62342342 0.63458111]
|
|
|
|
mean value: 0.6320719163089898
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.19334912 0.0987041 0.09737754 0.09836841 0.09884167 0.09638548
|
|
0.09560966 0.09962988 0.09771371 0.21906543]
|
|
|
|
mean value: 0.11950449943542481
|
|
|
|
key: score_time
|
|
value: [0.01154375 0.0123136 0.01152229 0.01115894 0.01118851 0.01113868
|
|
0.01159239 0.01117778 0.01116967 0.01107574]
|
|
|
|
mean value: 0.011388134956359864
|
|
|
|
key: test_mcc
|
|
value: [1. 0.93541435 0.93541435 0.97801929 0.95650071 0.91473203
|
|
0.9347507 0.95603853 0.91371736 0.97776667]
|
|
|
|
mean value: 0.9502353987276452
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.96666667 0.96666667 0.98888889 0.97777778 0.95555556
|
|
0.96629213 0.97752809 0.95505618 0.98876404]
|
|
|
|
mean value: 0.9743196004993758
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.96774194 0.96774194 0.98901099 0.97826087 0.95744681
|
|
0.96703297 0.97777778 0.95744681 0.98901099]
|
|
|
|
mean value: 0.9751471080386959
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.9375 0.9375 0.97826087 0.95744681 0.91836735
|
|
0.93617021 0.95652174 0.91836735 0.97826087]
|
|
|
|
mean value: 0.9518395193415017
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.96666667 0.96666667 0.98888889 0.97777778 0.95555556
|
|
0.96666667 0.97777778 0.95454545 0.98863636]
|
|
|
|
mean value: 0.9743181818181819
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.9375 0.9375 0.97826087 0.95744681 0.91836735
|
|
0.93617021 0.95652174 0.91836735 0.97826087]
|
|
|
|
mean value: 0.9518395193415017
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05069423 0.07617974 0.05460596 0.0779562 0.0546186 0.07746673
|
|
0.06075573 0.13045573 0.09342909 0.08180666]
|
|
|
|
mean value: 0.07579686641693115
|
|
|
|
key: score_time
|
|
value: [0.01877713 0.01299071 0.01298523 0.03048539 0.01936769 0.01325297
|
|
0.01332688 0.03067303 0.01894212 0.0123508 ]
|
|
|
|
mean value: 0.01831519603729248
|
|
|
|
key: test_mcc
|
|
value: [0.80498447 0.83553169 0.83553169 0.87447463 0.89442719 0.77113566
|
|
0.77746025 0.86879834 0.85305908 0.79939579]
|
|
|
|
mean value: 0.831479879780609
|
|
|
|
key: train_mcc
|
|
value: [0.88712152 0.88275107 0.88635131 0.87944648 0.87030254 0.87985069
|
|
0.87957892 0.88684987 0.88419706 0.89682326]
|
|
|
|
mean value: 0.8833272715253092
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.91111111 0.91111111 0.93333333 0.94444444 0.87777778
|
|
0.87640449 0.93258427 0.92134831 0.8988764 ]
|
|
|
|
mean value: 0.9106991260923845
|
|
|
|
key: train_accuracy
|
|
value: [0.94168734 0.93796526 0.94168734 0.93796526 0.93300248 0.93796526
|
|
0.93804213 0.9417596 0.94052045 0.94671623]
|
|
|
|
mean value: 0.9397311366732161
|
|
|
|
key: test_fscore
|
|
value: [0.90526316 0.91836735 0.91836735 0.9375 0.94736842 0.88888889
|
|
0.88888889 0.93478261 0.92783505 0.90322581]
|
|
|
|
mean value: 0.9170487517296354
|
|
|
|
key: train_fscore
|
|
value: [0.94424674 0.94158879 0.94398093 0.94061758 0.93617021 0.94075829
|
|
0.94075829 0.94424674 0.94285714 0.94887039]
|
|
|
|
mean value: 0.9424095103294992
|
|
|
|
key: test_precision
|
|
value: [0.86 0.8490566 0.8490566 0.88235294 0.9 0.81481481
|
|
0.8 0.89583333 0.86538462 0.875 ]
|
|
|
|
mean value: 0.8591498912256404
|
|
|
|
key: train_precision
|
|
value: [0.90454545 0.88962472 0.90825688 0.90205011 0.89390519 0.90022676
|
|
0.90227273 0.90660592 0.90617849 0.9109589 ]
|
|
|
|
mean value: 0.9024625166115716
|
|
|
|
key: test_recall
|
|
value: [0.95555556 1. 1. 1. 1. 0.97777778
|
|
1. 0.97727273 1. 0.93333333]
|
|
|
|
mean value: 0.9843939393939394
|
|
|
|
key: train_recall
|
|
value: [0.98759305 1. 0.98263027 0.98263027 0.98263027 0.98511166
|
|
0.98267327 0.98514851 0.98263027 0.99007444]
|
|
|
|
mean value: 0.9861122030317175
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.91111111 0.91111111 0.93333333 0.94444444 0.87777778
|
|
0.87777778 0.93308081 0.92045455 0.89848485]
|
|
|
|
mean value: 0.9107575757575758
|
|
|
|
key: train_roc_auc
|
|
value: [0.94168734 0.93796526 0.94168734 0.93796526 0.93300248 0.93796526
|
|
0.93798676 0.94170577 0.94057256 0.94676989]
|
|
|
|
mean value: 0.9397307937989828
|
|
|
|
key: test_jcc
|
|
value: [0.82692308 0.8490566 0.8490566 0.88235294 0.9 0.8
|
|
0.8 0.87755102 0.86538462 0.82352941]
|
|
|
|
mean value: 0.8473854273204202
|
|
|
|
key: train_jcc
|
|
value: [0.89438202 0.88962472 0.89390519 0.88789238 0.88 0.88814318
|
|
0.88814318 0.89438202 0.89189189 0.90271493]
|
|
|
|
mean value: 0.8911079515046985
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0145371 0.01495004 0.01486659 0.01497698 0.01494718 0.01497436
|
|
0.01491857 0.01487398 0.01495814 0.0148921 ]
|
|
|
|
mean value: 0.01488950252532959
|
|
|
|
key: score_time
|
|
value: [0.01198578 0.01184559 0.01186609 0.01184559 0.01187658 0.01186061
|
|
0.0119071 0.01186728 0.01185274 0.01183295]
|
|
|
|
mean value: 0.011874032020568848
|
|
|
|
key: test_mcc
|
|
value: [0.15695699 0.36288737 0.31304952 0.27500955 0.31752645 0.26832816
|
|
0.3297806 0.0611649 0.37076421 0.33260332]
|
|
|
|
mean value: 0.278807105577507
|
|
|
|
key: train_mcc
|
|
value: [0.30123762 0.31553888 0.27993821 0.279441 0.30871932 0.32626876
|
|
0.29632657 0.29075481 0.27310093 0.27307145]
|
|
|
|
mean value: 0.2944397534078397
|
|
|
|
key: test_accuracy
|
|
value: [0.57777778 0.67777778 0.65555556 0.63333333 0.65555556 0.63333333
|
|
0.66292135 0.52808989 0.68539326 0.66292135]
|
|
|
|
mean value: 0.6372659176029962
|
|
|
|
key: train_accuracy
|
|
value: [0.64888337 0.65508685 0.63771712 0.63771712 0.65136476 0.66004963
|
|
0.64684015 0.64312268 0.63568773 0.63444857]
|
|
|
|
mean value: 0.6450917991150633
|
|
|
|
key: test_fscore
|
|
value: [0.60416667 0.70707071 0.67368421 0.67326733 0.68686869 0.65263158
|
|
0.68085106 0.58 0.69565217 0.7 ]
|
|
|
|
mean value: 0.6654192414555249
|
|
|
|
key: train_fscore
|
|
value: [0.67358708 0.68409091 0.66742597 0.66590389 0.68248588 0.69004525
|
|
0.66975666 0.67272727 0.65411765 0.66285714]
|
|
|
|
mean value: 0.6722997699274416
|
|
|
|
key: test_precision
|
|
value: [0.56862745 0.64814815 0.64 0.60714286 0.62962963 0.62
|
|
0.64 0.51785714 0.68085106 0.63636364]
|
|
|
|
mean value: 0.6188619928951593
|
|
|
|
key: train_precision
|
|
value: [0.62931034 0.63102725 0.61684211 0.61783439 0.62655602 0.63409563
|
|
0.62962963 0.62184874 0.62192394 0.61440678]
|
|
|
|
mean value: 0.6243474835503734
|
|
|
|
key: test_recall
|
|
value: [0.64444444 0.77777778 0.71111111 0.75555556 0.75555556 0.68888889
|
|
0.72727273 0.65909091 0.71111111 0.77777778]
|
|
|
|
mean value: 0.7208585858585859
|
|
|
|
key: train_recall
|
|
value: [0.72456576 0.74689826 0.72704715 0.72208437 0.74937965 0.75682382
|
|
0.71534653 0.73267327 0.6898263 0.71960298]
|
|
|
|
mean value: 0.7284248089821389
|
|
|
|
key: test_roc_auc
|
|
value: [0.57777778 0.67777778 0.65555556 0.63333333 0.65555556 0.63333333
|
|
0.66363636 0.52954545 0.68510101 0.66161616]
|
|
|
|
mean value: 0.6373232323232323
|
|
|
|
key: train_roc_auc
|
|
value: [0.64888337 0.65508685 0.63771712 0.63771712 0.65136476 0.66004963
|
|
0.64675515 0.64301157 0.63575474 0.63455396]
|
|
|
|
mean value: 0.6450894282976685
|
|
|
|
key: test_jcc
|
|
value: [0.43283582 0.546875 0.50793651 0.50746269 0.52307692 0.484375
|
|
0.51612903 0.4084507 0.53333333 0.53846154]
|
|
|
|
mean value: 0.4998936546754406
|
|
|
|
key: train_jcc
|
|
value: [0.50782609 0.51986183 0.5008547 0.49914237 0.51801029 0.52677029
|
|
0.50348432 0.50684932 0.48601399 0.4957265 ]
|
|
|
|
mean value: 0.5064539688192113
|
|
|
|
MCC on Blind test: -0.08
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03323388 0.02752995 0.03028727 0.0292933 0.02535892 0.03237891
|
|
0.03820801 0.03229928 0.03019404 0.02762794]
|
|
|
|
mean value: 0.03064115047454834
|
|
|
|
key: score_time
|
|
value: [0.01182437 0.01198697 0.01192284 0.01188111 0.01183653 0.01187468
|
|
0.01186013 0.01197815 0.0118649 0.01188087]
|
|
|
|
mean value: 0.011891055107116699
|
|
|
|
key: test_mcc
|
|
value: [0.71554175 0.84465303 0.78478493 0.82548988 0.76088591 0.70710678
|
|
0.77746025 0.81495457 0.73300437 0.82112188]
|
|
|
|
mean value: 0.7785003350385149
|
|
|
|
key: train_mcc
|
|
value: [0.85991073 0.85624813 0.78272262 0.838254 0.83049832 0.82411569
|
|
0.8169997 0.72582633 0.88854218 0.83795659]
|
|
|
|
mean value: 0.8261074282969967
|
|
|
|
key: test_accuracy
|
|
value: [0.85555556 0.92222222 0.88888889 0.91111111 0.86666667 0.83333333
|
|
0.87640449 0.8988764 0.86516854 0.91011236]
|
|
|
|
mean value: 0.8828339575530587
|
|
|
|
key: train_accuracy
|
|
value: [0.9280397 0.9280397 0.88833747 0.91811414 0.90818859 0.9044665
|
|
0.90334572 0.84510533 0.94423792 0.9157373 ]
|
|
|
|
mean value: 0.9083612374354669
|
|
|
|
key: test_fscore
|
|
value: [0.86315789 0.92307692 0.89583333 0.90697674 0.88235294 0.85714286
|
|
0.88888889 0.90721649 0.87234043 0.90909091]
|
|
|
|
mean value: 0.8906077412009547
|
|
|
|
key: train_fscore
|
|
value: [0.93127962 0.9273183 0.89485981 0.9151671 0.91590909 0.91279728
|
|
0.91055046 0.86602358 0.94382022 0.92056075]
|
|
|
|
mean value: 0.9138286208642713
|
|
|
|
key: test_precision
|
|
value: [0.82 0.91304348 0.84313725 0.95121951 0.78947368 0.75
|
|
0.8 0.83018868 0.83673469 0.93023256]
|
|
|
|
mean value: 0.8464029860830847
|
|
|
|
key: train_precision
|
|
value: [0.89115646 0.93670886 0.84547461 0.94933333 0.84486373 0.83958333
|
|
0.8482906 0.7637051 0.94974874 0.86975717]
|
|
|
|
mean value: 0.8738621955725795
|
|
|
|
key: test_recall
|
|
value: [0.91111111 0.93333333 0.95555556 0.86666667 1. 1.
|
|
1. 1. 0.91111111 0.88888889]
|
|
|
|
mean value: 0.9466666666666667
|
|
|
|
key: train_recall
|
|
value: [0.9751861 0.91811414 0.95037221 0.88337469 1. 1.
|
|
0.98267327 1. 0.93796526 0.97766749]
|
|
|
|
mean value: 0.9625353168071149
|
|
|
|
key: test_roc_auc
|
|
value: [0.85555556 0.92222222 0.88888889 0.91111111 0.86666667 0.83333333
|
|
0.87777778 0.9 0.86464646 0.91035354]
|
|
|
|
mean value: 0.8830555555555556
|
|
|
|
key: train_roc_auc
|
|
value: [0.9280397 0.9280397 0.88833747 0.91811414 0.90818859 0.9044665
|
|
0.9032473 0.84491315 0.94423016 0.91581394]
|
|
|
|
mean value: 0.9083390659165173
|
|
|
|
key: test_jcc
|
|
value: [0.75925926 0.85714286 0.81132075 0.82978723 0.78947368 0.75
|
|
0.8 0.83018868 0.77358491 0.83333333]
|
|
|
|
mean value: 0.8034090707611171
|
|
|
|
key: train_jcc
|
|
value: [0.8713969 0.86448598 0.80972516 0.8436019 0.84486373 0.83958333
|
|
0.83578947 0.7637051 0.89361702 0.85281385]
|
|
|
|
mean value: 0.8419582448126447
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02210283 0.03469229 0.03090715 0.02344179 0.02792764 0.02314782
|
|
0.03058457 0.02959204 0.02414179 0.02906966]
|
|
|
|
mean value: 0.02756075859069824
|
|
|
|
key: score_time
|
|
value: [0.01034188 0.01189733 0.01190758 0.01188159 0.01188207 0.01186109
|
|
0.01192474 0.01181865 0.01189327 0.01181841]
|
|
|
|
mean value: 0.011722660064697266
|
|
|
|
key: test_mcc
|
|
value: [0.76088591 0.79772404 0.83553169 0.48107024 0.89442719 0.73624773
|
|
0.79608094 0.79969743 0.72215913 0.86515152]
|
|
|
|
mean value: 0.7688975813754446
|
|
|
|
key: train_mcc
|
|
value: [0.83049832 0.81776345 0.87251635 0.52227446 0.88497788 0.8437374
|
|
0.88732582 0.85950631 0.87635703 0.81914807]
|
|
|
|
mean value: 0.8214105096168994
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.88888889 0.91111111 0.7 0.94444444 0.86666667
|
|
0.88764045 0.8988764 0.85393258 0.93258427]
|
|
|
|
mean value: 0.8750811485642946
|
|
|
|
key: train_accuracy
|
|
value: [0.90818859 0.90074442 0.93548387 0.72208437 0.93920596 0.92183623
|
|
0.94052045 0.92936803 0.93804213 0.90334572]
|
|
|
|
mean value: 0.9038819756411793
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.9 0.91836735 0.58461538 0.94736842 0.87234043
|
|
0.89795918 0.9010989 0.86868687 0.93333333]
|
|
|
|
mean value: 0.870612280610775
|
|
|
|
key: train_fscore
|
|
value: [0.91590909 0.90970655 0.9373494 0.62289562 0.94269006 0.92134831
|
|
0.94392523 0.92793932 0.93872549 0.9109589 ]
|
|
|
|
mean value: 0.8971447976027119
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.81818182 0.8490566 0.95 0.9 0.83673469
|
|
0.81481481 0.87234043 0.7962963 0.93333333]
|
|
|
|
mean value: 0.856023167001984
|
|
|
|
key: train_precision
|
|
value: [0.84486373 0.83436853 0.91100703 0.96858639 0.89159292 0.92713568
|
|
0.89380531 0.94832041 0.92736077 0.8435518 ]
|
|
|
|
mean value: 0.8990592568648286
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.42222222 1. 0.91111111
|
|
1. 0.93181818 0.95555556 0.93333333]
|
|
|
|
mean value: 0.9154040404040404
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.96526055 0.45905707 1. 0.91563275
|
|
1. 0.90841584 0.95037221 0.99007444]
|
|
|
|
mean value: 0.9188812863916664
|
|
|
|
key: test_roc_auc
|
|
value: [0.86666667 0.88888889 0.91111111 0.7 0.94444444 0.86666667
|
|
0.88888889 0.89924242 0.85277778 0.93257576]
|
|
|
|
mean value: 0.8751262626262626
|
|
|
|
key: train_roc_auc
|
|
value: [0.90818859 0.90074442 0.93548387 0.72208437 0.93920596 0.92183623
|
|
0.94044665 0.92939403 0.93805739 0.90345306]
|
|
|
|
mean value: 0.9038894553227035
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.81818182 0.8490566 0.41304348 0.9 0.77358491
|
|
0.81481481 0.82 0.76785714 0.875 ]
|
|
|
|
mean value: 0.7821012447759134
|
|
|
|
key: train_jcc
|
|
value: [0.84486373 0.83436853 0.88208617 0.45232274 0.89159292 0.85416667
|
|
0.89380531 0.86556604 0.88452656 0.83647799]
|
|
|
|
mean value: 0.82397766486675
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.21388268 0.21028662 0.21001744 0.20961165 0.21119356 0.21068764
|
|
0.21117711 0.21060681 0.21195483 0.21123028]
|
|
|
|
mean value: 0.21106486320495604
|
|
|
|
key: score_time
|
|
value: [0.0154357 0.01552796 0.01554179 0.01553774 0.01558185 0.01567173
|
|
0.01551652 0.01555777 0.01540995 0.01554251]
|
|
|
|
mean value: 0.015532350540161133
|
|
|
|
key: test_mcc
|
|
value: [0.95650071 0.89442719 0.93541435 0.91473203 0.97801929 0.93541435
|
|
0.91388467 0.9347507 0.89315579 0.97776667]
|
|
|
|
mean value: 0.9334065758718476
|
|
|
|
key: train_mcc
|
|
value: [0.98522086 0.98766907 0.98277854 0.97791139 0.98522086 0.98766907
|
|
0.98523875 0.99752474 0.9901355 0.9901355 ]
|
|
|
|
mean value: 0.9869504280293591
|
|
|
|
key: test_accuracy
|
|
value: [0.97777778 0.94444444 0.96666667 0.95555556 0.98888889 0.96666667
|
|
0.95505618 0.96629213 0.94382022 0.98876404]
|
|
|
|
mean value: 0.9653932584269663
|
|
|
|
key: train_accuracy
|
|
value: [0.99255583 0.99379653 0.99131514 0.98883375 0.99255583 0.99379653
|
|
0.99256506 0.99876084 0.99504337 0.99504337]
|
|
|
|
mean value: 0.9934266237420093
|
|
|
|
key: test_fscore
|
|
value: [0.97826087 0.94736842 0.96774194 0.95744681 0.98901099 0.96774194
|
|
0.95652174 0.96703297 0.94736842 0.98901099]
|
|
|
|
mean value: 0.9667505075334241
|
|
|
|
key: train_fscore
|
|
value: [0.99261084 0.99383477 0.99138991 0.98895706 0.99261084 0.99383477
|
|
0.99262899 0.99876391 0.99506173 0.99506173]
|
|
|
|
mean value: 0.9934754543239807
|
|
|
|
key: test_precision
|
|
value: [0.95744681 0.9 0.9375 0.91836735 0.97826087 0.9375
|
|
0.91666667 0.93617021 0.9 0.97826087]
|
|
|
|
mean value: 0.9360172774012473
|
|
|
|
key: train_precision
|
|
value: [0.98533007 0.9877451 0.98292683 0.97815534 0.98533007 0.9877451
|
|
0.98536585 0.99753086 0.99017199 0.99017199]
|
|
|
|
mean value: 0.9870473210051864
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97777778 0.94444444 0.96666667 0.95555556 0.98888889 0.96666667
|
|
0.95555556 0.96666667 0.94318182 0.98863636]
|
|
|
|
mean value: 0.9654040404040404
|
|
|
|
key: train_roc_auc
|
|
value: [0.99255583 0.99379653 0.99131514 0.98883375 0.99255583 0.99379653
|
|
0.99255583 0.99875931 0.9950495 0.9950495 ]
|
|
|
|
mean value: 0.9934267744392304
|
|
|
|
key: test_jcc
|
|
value: [0.95744681 0.9 0.9375 0.91836735 0.97826087 0.9375
|
|
0.91666667 0.93617021 0.9 0.97826087]
|
|
|
|
mean value: 0.9360172774012473
|
|
|
|
key: train_jcc
|
|
value: [0.98533007 0.9877451 0.98292683 0.97815534 0.98533007 0.9877451
|
|
0.98536585 0.99753086 0.99017199 0.99017199]
|
|
|
|
mean value: 0.9870473210051864
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.95
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19595027 0.10121179 0.19956589 0.18760133 0.19746017 0.21289349
|
|
0.19443989 0.20348549 0.2055757 0.20671153]
|
|
|
|
mean value: 0.1904895544052124
|
|
|
|
key: score_time
|
|
value: [0.03755951 0.03765368 0.03275847 0.03973651 0.03542471 0.04145002
|
|
0.03873539 0.0401361 0.02827811 0.03850675]
|
|
|
|
mean value: 0.03702392578125
|
|
|
|
key: test_mcc
|
|
value: [1. 0.89442719 0.93541435 0.91473203 0.93541435 0.93541435
|
|
0.9347507 0.9347507 0.95599503 0.95599503]
|
|
|
|
mean value: 0.9396893720409134
|
|
|
|
key: train_mcc
|
|
value: [0.99752168 0.99752168 0.99504947 0.99752168 0.99504947 0.99504947
|
|
0.99752474 1. 0.99752475 1. ]
|
|
|
|
mean value: 0.9972762957729306
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.94444444 0.96666667 0.95555556 0.96666667 0.96666667
|
|
0.96629213 0.96629213 0.97752809 0.97752809]
|
|
|
|
mean value: 0.9687640449438202
|
|
|
|
key: train_accuracy
|
|
value: [0.99875931 0.99875931 0.99751861 0.99875931 0.99751861 0.99751861
|
|
0.99876084 1. 0.99876084 1. ]
|
|
|
|
mean value: 0.9986355432152291
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94736842 0.96774194 0.95744681 0.96774194 0.96774194
|
|
0.96703297 0.96703297 0.97826087 0.97826087]
|
|
|
|
mean value: 0.9698628709211252
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.99876084 0.99876084 0.99752475 0.99876084 0.99752475 0.99752475
|
|
0.99876391 1. 0.99876084 1. ]
|
|
|
|
mean value: 0.9986381533990657
|
|
|
|
key: test_precision
|
|
value: [1. 0.9 0.9375 0.91836735 0.9375 0.9375
|
|
0.93617021 0.93617021 0.95744681 0.95744681]
|
|
|
|
mean value: 0.9418101389491967
|
|
|
|
key: train_precision
|
|
value: [0.99752475 0.99752475 0.99506173 0.99752475 0.99506173 0.99506173
|
|
0.99753086 1. 0.99752475 1. ]
|
|
|
|
mean value: 0.9972815059283706
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.94444444 0.96666667 0.95555556 0.96666667 0.96666667
|
|
0.96666667 0.96666667 0.97727273 0.97727273]
|
|
|
|
mean value: 0.9687878787878788
|
|
|
|
key: train_roc_auc
|
|
value: [0.99875931 0.99875931 0.99751861 0.99875931 0.99751861 0.99751861
|
|
0.99875931 1. 0.99876238 1. ]
|
|
|
|
mean value: 0.9986355428346805
|
|
|
|
key: test_jcc
|
|
value: [1. 0.9 0.9375 0.91836735 0.9375 0.9375
|
|
0.93617021 0.93617021 0.95744681 0.95744681]
|
|
|
|
mean value: 0.9418101389491967
|
|
|
|
key: train_jcc
|
|
value: [0.99752475 0.99752475 0.99506173 0.99752475 0.99506173 0.99506173
|
|
0.99753086 1. 0.99752475 1. ]
|
|
|
|
mean value: 0.9972815059283706
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.40776467 0.43745208 0.45081449 0.36511922 0.33153629 0.39808536
|
|
0.34002995 0.34384251 0.36300969 0.36822796]
|
|
|
|
mean value: 0.3805882215499878
|
|
|
|
key: score_time
|
|
value: [0.03453422 0.01970077 0.0196085 0.01963782 0.01964974 0.01968241
|
|
0.01955247 0.03294897 0.01964808 0.03975558]
|
|
|
|
mean value: 0.024471855163574217
|
|
|
|
key: test_mcc
|
|
value: [0.79772404 0.91473203 0.87447463 0.85485041 0.85485041 0.93541435
|
|
0.9347507 0.89341253 0.91371736 0.87294449]
|
|
|
|
mean value: 0.8846870946572671
|
|
|
|
key: train_mcc
|
|
value: [0.98034206 0.97306727 0.97306727 0.97306727 0.97548647 0.97548647
|
|
0.97309927 0.9755157 0.9706906 0.97310106]
|
|
|
|
mean value: 0.9742923425574918
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.95555556 0.93333333 0.92222222 0.92222222 0.96666667
|
|
0.96629213 0.94382022 0.95505618 0.93258427]
|
|
|
|
mean value: 0.9386641697877653
|
|
|
|
key: train_accuracy
|
|
value: [0.99007444 0.98635236 0.98635236 0.98635236 0.98759305 0.98759305
|
|
0.98636927 0.98760843 0.98513011 0.98636927]
|
|
|
|
mean value: 0.9869794693454604
|
|
|
|
key: test_fscore
|
|
value: [0.9 0.95744681 0.9375 0.92783505 0.92783505 0.96774194
|
|
0.96703297 0.94623656 0.95744681 0.9375 ]
|
|
|
|
mean value: 0.9426575181770683
|
|
|
|
key: train_fscore
|
|
value: [0.99017199 0.98653611 0.98653611 0.98653611 0.9877451 0.9877451
|
|
0.98656899 0.98777506 0.98533007 0.98653611]
|
|
|
|
mean value: 0.987148073813829
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.91836735 0.88235294 0.86538462 0.86538462 0.9375
|
|
0.93617021 0.89795918 0.91836735 0.88235294]
|
|
|
|
mean value: 0.8922021021620968
|
|
|
|
key: train_precision
|
|
value: [0.98053528 0.97342995 0.97342995 0.97342995 0.97578692 0.97578692
|
|
0.97349398 0.97584541 0.97108434 0.97342995]
|
|
|
|
mean value: 0.9746252660328604
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 0.95555556 0.93333333 0.92222222 0.92222222 0.96666667
|
|
0.96666667 0.94444444 0.95454545 0.93181818]
|
|
|
|
mean value: 0.9386363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [0.99007444 0.98635236 0.98635236 0.98635236 0.98759305 0.98759305
|
|
0.98635236 0.98759305 0.98514851 0.98638614]
|
|
|
|
mean value: 0.9869797680760631
|
|
|
|
key: test_jcc
|
|
value: [0.81818182 0.91836735 0.88235294 0.86538462 0.86538462 0.9375
|
|
0.93617021 0.89795918 0.91836735 0.88235294]
|
|
|
|
mean value: 0.8922021021620968
|
|
|
|
key: train_jcc
|
|
value: [0.98053528 0.97342995 0.97342995 0.97342995 0.97578692 0.97578692
|
|
0.97349398 0.97584541 0.97108434 0.97342995]
|
|
|
|
mean value: 0.9746252660328604
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.86635518 0.8551178 0.85468936 0.85569334 0.85759234 0.85957646
|
|
0.86946774 0.86246204 0.86470008 0.86424017]
|
|
|
|
mean value: 0.8609894514083862
|
|
|
|
key: score_time
|
|
value: [0.00959229 0.00925708 0.00936532 0.00937605 0.00956178 0.00943971
|
|
0.0094378 0.0093801 0.00957036 0.00927758]
|
|
|
|
mean value: 0.009425806999206542
|
|
|
|
key: test_mcc
|
|
value: [0.95650071 0.91473203 0.91473203 0.89442719 0.95650071 0.93541435
|
|
0.89341253 0.91388467 0.93465477 0.97776667]
|
|
|
|
mean value: 0.9292025674733508
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97777778 0.95555556 0.95555556 0.94444444 0.97777778 0.96666667
|
|
0.94382022 0.95505618 0.96629213 0.98876404]
|
|
|
|
mean value: 0.9631710362047441
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97826087 0.95744681 0.95744681 0.94736842 0.97826087 0.96774194
|
|
0.94623656 0.95652174 0.96774194 0.98901099]
|
|
|
|
mean value: 0.9646036935453294
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.95744681 0.91836735 0.91836735 0.9 0.95744681 0.9375
|
|
0.89795918 0.91666667 0.9375 0.97826087]
|
|
|
|
mean value: 0.9319515030804181
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97777778 0.95555556 0.95555556 0.94444444 0.97777778 0.96666667
|
|
0.94444444 0.95555556 0.96590909 0.98863636]
|
|
|
|
mean value: 0.9632323232323232
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.95744681 0.91836735 0.91836735 0.9 0.95744681 0.9375
|
|
0.89795918 0.91666667 0.9375 0.97826087]
|
|
|
|
mean value: 0.9319515030804181
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03589606 0.03756332 0.03688979 0.03677058 0.04268074 0.0443778
|
|
0.03895593 0.04160762 0.06652355 0.04450488]
|
|
|
|
mean value: 0.04257702827453613
|
|
|
|
key: score_time
|
|
value: [0.01226711 0.01267052 0.01276422 0.01362777 0.01272678 0.01267147
|
|
0.02288413 0.01285696 0.01315737 0.01290083]
|
|
|
|
mean value: 0.013852715492248535
|
|
|
|
key: test_mcc
|
|
value: [0.97801929 0.97801929 0.87447463 0.97801929 0.97801929 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9786551807569812
|
|
|
|
key: train_mcc
|
|
value: [0.97065374 0.98277854 0.93743687 0.96344676 0.98522086 1.
|
|
0.98523929 1. 0.98768405 1. ]
|
|
|
|
mean value: 0.9812460109419945
|
|
|
|
key: test_accuracy
|
|
value: [0.98888889 0.98888889 0.93333333 0.98888889 0.98888889 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.98511166 0.99131514 0.96774194 0.98138958 0.99255583 1.
|
|
0.99256506 1. 0.99380421 1. ]
|
|
|
|
mean value: 0.9904483412817745
|
|
|
|
key: test_fscore
|
|
value: [0.98876404 0.98876404 0.92857143 0.98876404 0.98876404 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9883627608346709
|
|
|
|
key: train_fscore
|
|
value: [0.98488665 0.99123905 0.96666667 0.98103666 0.9925 1.
|
|
0.9925187 1. 0.9937578 1. ]
|
|
|
|
mean value: 0.990260553379279
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97777778 0.97777778 0.86666667 0.97777778 0.97777778 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: train_recall
|
|
value: [0.97022333 0.98263027 0.93548387 0.96277916 0.98511166 1.
|
|
0.98514851 1. 0.98759305 1. ]
|
|
|
|
mean value: 0.9808969854801857
|
|
|
|
key: test_roc_auc
|
|
value: [0.98888889 0.98888889 0.93333333 0.98888889 0.98888889 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9888888888888889
|
|
|
|
key: train_roc_auc
|
|
value: [0.98511166 0.99131514 0.96774194 0.98138958 0.99255583 1.
|
|
0.99257426 1. 0.99379653 1. ]
|
|
|
|
mean value: 0.9904484927400928
|
|
|
|
key: test_jcc
|
|
value: [0.97777778 0.97777778 0.86666667 0.97777778 0.97777778 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: train_jcc
|
|
value: [0.97022333 0.98263027 0.93548387 0.96277916 0.98511166 1.
|
|
0.98514851 1. 0.98759305 1. ]
|
|
|
|
mean value: 0.9808969854801857
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01786661 0.0176003 0.02718854 0.04248023 0.04240894 0.03510284
|
|
0.04219532 0.04208684 0.04209971 0.04229808]
|
|
|
|
mean value: 0.035132741928100585
|
|
|
|
key: score_time
|
|
value: [0.01350927 0.01223803 0.01893806 0.0188787 0.02868772 0.01888013
|
|
0.01891541 0.01884198 0.0188458 0.01894546]
|
|
|
|
mean value: 0.018668055534362793
|
|
|
|
key: test_mcc
|
|
value: [0.73624773 0.85485041 0.81649658 0.89442719 0.89442719 0.80985829
|
|
0.83410221 0.82801395 0.83347626 0.77525253]
|
|
|
|
mean value: 0.8277152340669511
|
|
|
|
key: train_mcc
|
|
value: [0.86220238 0.86575671 0.87030254 0.87030254 0.86349021 0.87985069
|
|
0.87728835 0.87542482 0.87731551 0.86279138]
|
|
|
|
mean value: 0.8704725129113309
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.92222222 0.9 0.94444444 0.94444444 0.9
|
|
0.91011236 0.91011236 0.91011236 0.88764045]
|
|
|
|
mean value: 0.9095755305867665
|
|
|
|
key: train_accuracy
|
|
value: [0.9292804 0.93052109 0.93300248 0.93300248 0.9292804 0.93796526
|
|
0.93680297 0.93556382 0.93680297 0.92936803]
|
|
|
|
mean value: 0.9331589903481018
|
|
|
|
key: test_fscore
|
|
value: [0.87234043 0.92783505 0.90909091 0.94736842 0.94736842 0.90721649
|
|
0.91666667 0.91489362 0.91836735 0.88888889]
|
|
|
|
mean value: 0.9150036242635448
|
|
|
|
key: train_fscore
|
|
value: [0.93238434 0.93396226 0.93617021 0.93617021 0.93286219 0.94075829
|
|
0.93964497 0.93867925 0.93950178 0.93254438]
|
|
|
|
mean value: 0.9362677889726327
|
|
|
|
key: test_precision
|
|
value: [0.83673469 0.86538462 0.83333333 0.9 0.9 0.84615385
|
|
0.84615385 0.86 0.8490566 0.88888889]
|
|
|
|
mean value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_cd_sl.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
0.8625705827565666
|
|
|
|
key: train_precision
|
|
value: [0.89318182 0.88988764 0.89390519 0.89390519 0.88789238 0.90022676
|
|
0.90022676 0.8963964 0.9 0.89140271]
|
|
|
|
mean value: 0.8947024845127801
|
|
|
|
key: test_recall
|
|
value: [0.91111111 1. 1. 1. 1. 0.97777778
|
|
1. 0.97727273 1. 0.88888889]
|
|
|
|
mean value: 0.9755050505050505
|
|
|
|
key: train_recall
|
|
value: [0.9751861 0.98263027 0.98263027 0.98263027 0.98263027 0.98511166
|
|
0.98267327 0.98514851 0.98263027 0.97766749]
|
|
|
|
mean value: 0.9818938407488391
|
|
|
|
key: test_roc_auc
|
|
value: [0.86666667 0.92222222 0.9 0.94444444 0.94444444 0.9
|
|
0.91111111 0.91085859 0.90909091 0.88762626]
|
|
|
|
mean value: 0.9096464646464646
|
|
|
|
key: train_roc_auc
|
|
value: [0.9292804 0.93052109 0.93300248 0.93300248 0.9292804 0.93796526
|
|
0.93674606 0.9355023 0.93685969 0.92942781]
|
|
|
|
mean value: 0.9331587966488957
|
|
|
|
key: test_jcc
|
|
value: [0.77358491 0.86538462 0.83333333 0.9 0.9 0.83018868
|
|
0.84615385 0.84313725 0.8490566 0.8 ]
|
|
|
|
mean value: 0.8440839238453002
|
|
|
|
key: train_jcc
|
|
value: [0.87333333 0.87610619 0.88 0.88 0.87417219 0.88814318
|
|
0.88616071 0.88444444 0.88590604 0.87361419]
|
|
|
|
mean value: 0.8801880279873819
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.15945601 0.31988525 0.23505592 0.32744741 0.35723615 0.316921
|
|
0.18413377 0.25622106 0.34382582 0.25827765]
|
|
|
|
mean value: 0.275846004486084
|
|
|
|
key: score_time
|
|
value: [0.01693988 0.01902175 0.01220441 0.01939917 0.01899529 0.01339293
|
|
0.02362895 0.01896977 0.02248216 0.01224971]
|
|
|
|
mean value: 0.017728400230407716
|
|
|
|
key: test_mcc
|
|
value: [0.78478493 0.85485041 0.83553169 0.87447463 0.91473203 0.80985829
|
|
0.83410221 0.82801395 0.83347626 0.7979798 ]
|
|
|
|
mean value: 0.8367804199822446
|
|
|
|
key: train_mcc
|
|
value: [0.87800481 0.87030254 0.87715387 0.8748657 0.87030254 0.87985069
|
|
0.87728835 0.87542482 0.87731551 0.87424945]
|
|
|
|
mean value: 0.8754758268110596
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.92222222 0.91111111 0.93333333 0.95555556 0.9
|
|
0.91011236 0.91011236 0.91011236 0.8988764 ]
|
|
|
|
mean value: 0.9140324594257179
|
|
|
|
key: train_accuracy
|
|
value: [0.93672457 0.93300248 0.93672457 0.93548387 0.93300248 0.93796526
|
|
0.93680297 0.93556382 0.93680297 0.93556382]
|
|
|
|
mean value: 0.9357636806971259
|
|
|
|
key: test_fscore
|
|
value: [0.89583333 0.92783505 0.91836735 0.9375 0.95744681 0.90721649
|
|
0.91666667 0.91489362 0.91836735 0.8988764 ]
|
|
|
|
mean value: 0.91930030702956
|
|
|
|
key: train_fscore
|
|
value: [0.93978749 0.93617021 0.93950178 0.93838863 0.93617021 0.94075829
|
|
0.93964497 0.93867925 0.93950178 0.93809524]
|
|
|
|
mean value: 0.9386697842716545
|
|
|
|
key: test_precision
|
|
value: [0.84313725 0.86538462 0.8490566 0.88235294 0.91836735 0.84615385
|
|
0.84615385 0.86 0.8490566 0.90909091]
|
|
|
|
mean value: 0.8668753967347593
|
|
|
|
key: train_precision
|
|
value: [0.8963964 0.89390519 0.9 0.89795918 0.89390519 0.90022676
|
|
0.90022676 0.8963964 0.9 0.90160183]
|
|
|
|
mean value: 0.8980617705616285
|
|
|
|
key: test_recall
|
|
value: [0.95555556 1. 1. 1. 1. 0.97777778
|
|
1. 0.97727273 1. 0.88888889]
|
|
|
|
mean value: 0.9799494949494949
|
|
|
|
key: train_recall
|
|
value: [0.98759305 0.98263027 0.98263027 0.98263027 0.98263027 0.98511166
|
|
0.98267327 0.98514851 0.98263027 0.97766749]
|
|
|
|
mean value: 0.983134535537921
|
|
|
|
key: test_roc_auc
|
|
value: [0.88888889 0.92222222 0.91111111 0.93333333 0.95555556 0.9
|
|
0.91111111 0.91085859 0.90909091 0.8989899 ]
|
|
|
|
mean value: 0.9141161616161616
|
|
|
|
key: train_roc_auc
|
|
value: [0.93672457 0.93300248 0.93672457 0.93548387 0.93300248 0.93796526
|
|
0.93674606 0.9355023 0.93685969 0.93561593]
|
|
|
|
mean value: 0.9357627201926147
|
|
|
|
key: test_jcc
|
|
value: [0.81132075 0.86538462 0.8490566 0.88235294 0.91836735 0.83018868
|
|
0.84615385 0.84313725 0.8490566 0.81632653]
|
|
|
|
mean value: 0.8511345176677347
|
|
|
|
key: train_jcc
|
|
value: [0.88641425 0.88 0.88590604 0.88392857 0.88 0.88814318
|
|
0.88616071 0.88444444 0.88590604 0.88340807]
|
|
|
|
mean value: 0.8844311313075853
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.83
|