18975 lines
907 KiB
Text
18975 lines
907 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_7030.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 424
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 424
|
|
ncols: 265
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 102
|
|
log10_or_mychisq 102
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 166
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 173
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification: 70/30
|
|
Input features data size: (185, 173)
|
|
Train data size: (123, 173)
|
|
Test data size: (62, 173)
|
|
y_train numbers: Counter({1: 76, 0: 47})
|
|
y_train ratio: 0.618421052631579
|
|
|
|
y_test_numbers: Counter({1: 38, 0: 24})
|
|
y_test ratio: 0.631578947368421
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({1: 76, 0: 47}) Data dim: (123, 173)
|
|
|
|
Simple Random OverSampling
|
|
Counter({0: 76, 1: 76})
|
|
(152, 173)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 47, 1: 47})
|
|
(94, 173)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 76, 1: 76})
|
|
(152, 173)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 76, 1: 76})
|
|
(152, 173)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: 70/30 split
|
|
Gene name: pncA
|
|
Drug name: pyrazinamide
|
|
|
|
Output directory: /home/tanu/git/Data/pyrazinamide/output/ml/tts_7030/
|
|
|
|
Sanity checks:
|
|
Total input features: 173
|
|
|
|
Training data size: (123, 173)
|
|
Test data size: (62, 173)
|
|
|
|
Target feature numbers (training data): Counter({1: 76, 0: 47})
|
|
Target features ratio (training data: 0.618421052631579
|
|
|
|
Target feature numbers (test data): Counter({1: 38, 0: 24})
|
|
Target features ratio (test data): 0.631578947368421
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 34
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03451514 0.03091025 0.02606225 0.02828121 0.02817845 0.02744389
|
|
0.02901578 0.02529001 0.02208805 0.03796244]
|
|
|
|
mean value: 0.028974747657775878
|
|
|
|
key: score_time
|
|
value: [0.01222873 0.01185632 0.01173902 0.0115273 0.01153255 0.01144266
|
|
0.01169825 0.01158834 0.01151562 0.01162696]
|
|
|
|
mean value: 0.011675572395324707
|
|
|
|
key: test_mcc
|
|
value: [ 0.15811388 0.69282032 -0.05773503 0.29277002 0.16903085 -0.09759001
|
|
0.52915026 0.40824829 0.25 0.35355339]
|
|
|
|
mean value: 0.26983619879224724
|
|
|
|
key: train_mcc
|
|
value: [0.84761434 0.84516165 0.84761434 0.84833359 0.88561363 0.84646464
|
|
0.84646464 0.84998189 0.8712461 0.90537026]
|
|
|
|
mean value: 0.8593865102328739
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.84615385 0.53846154 0.66666667 0.58333333 0.5
|
|
0.75 0.75 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6583333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.92727273 0.92727273 0.92727273 0.92792793 0.94594595 0.92792793
|
|
0.92792793 0.92792793 0.93693694 0.95495495]
|
|
|
|
mean value: 0.9331367731367731
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.88888889 0.66666667 0.75 0.61538462 0.625
|
|
0.82352941 0.82352941 0.75 0.71428571]
|
|
|
|
mean value: 0.7363167061696474
|
|
|
|
key: train_fscore
|
|
value: [0.94366197 0.94202899 0.94366197 0.94444444 0.95774648 0.94366197
|
|
0.94366197 0.94366197 0.95104895 0.96402878]
|
|
|
|
mean value: 0.9477607496007229
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.8 0.6 0.66666667 0.66666667 0.55555556
|
|
0.7 0.77777778 0.75 0.83333333]
|
|
|
|
mean value: 0.7016666666666667
|
|
|
|
key: train_precision
|
|
value: [0.90540541 0.92857143 0.90540541 0.90666667 0.93150685 0.91780822
|
|
0.91780822 0.90540541 0.90666667 0.94366197]
|
|
|
|
mean value: 0.9168906237623197
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.75 0.85714286 0.57142857 0.71428571
|
|
1. 0.875 0.75 0.625 ]
|
|
|
|
mean value: 0.7892857142857143
|
|
|
|
key: train_recall
|
|
value: [0.98529412 0.95588235 0.98529412 0.98550725 0.98550725 0.97101449
|
|
0.97101449 0.98529412 1. 0.98529412]
|
|
|
|
mean value: 0.9810102301790282
|
|
|
|
key: test_roc_auc
|
|
value: [0.575 0.8 0.475 0.62857143 0.58571429 0.45714286
|
|
0.7 0.6875 0.625 0.6875 ]
|
|
|
|
mean value: 0.6221428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.90931373 0.91841737 0.90931373 0.90942029 0.93322981 0.91407867
|
|
0.91407867 0.91125171 0.91860465 0.94613543]
|
|
|
|
mean value: 0.9183844063408984
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.8 0.5 0.6 0.44444444 0.45454545
|
|
0.7 0.7 0.6 0.55555556]
|
|
|
|
mean value: 0.59
|
|
|
|
key: train_jcc
|
|
value: [0.89333333 0.89041096 0.89333333 0.89473684 0.91891892 0.89333333
|
|
0.89333333 0.89333333 0.90666667 0.93055556]
|
|
|
|
mean value: 0.9007955608817181
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.76646233 0.66281152 0.64495134 0.77394819 0.82080579 0.66489434
|
|
0.7808938 1.06324553 0.68852258 0.77514148]
|
|
|
|
mean value: 0.7641676902770996
|
|
|
|
key: score_time
|
|
value: [0.01315856 0.01507068 0.01508093 0.01194501 0.01495957 0.01510906
|
|
0.01511002 0.01480508 0.01475024 0.01488853]
|
|
|
|
mean value: 0.014487767219543457
|
|
|
|
key: test_mcc
|
|
value: [0.15811388 0.84327404 0.35 0.83666003 0.47809144 0.07559289
|
|
0.68313005 0.25 0.83666003 0.625 ]
|
|
|
|
mean value: 0.5136522368187714
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.92307692 0.69230769 0.91666667 0.75 0.58333333
|
|
0.83333333 0.66666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.7730769230769231
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.94117647 0.75 0.93333333 0.8 0.70588235
|
|
0.875 0.75 0.93333333 0.875 ]
|
|
|
|
mean value: 0.8269607843137254
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.88888889 0.75 0.875 0.75 0.6
|
|
0.77777778 0.75 1. 0.875 ]
|
|
|
|
mean value: 0.7933333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.75 1. 0.85714286 0.85714286
|
|
1. 0.75 0.875 0.875 ]
|
|
|
|
mean value: 0.8714285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.575 0.9 0.675 0.9 0.72857143 0.52857143
|
|
0.8 0.625 0.9375 0.8125 ]
|
|
|
|
mean value: 0.7482142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.88888889 0.6 0.875 0.66666667 0.54545455
|
|
0.77777778 0.6 0.875 0.77777778]
|
|
|
|
mean value: 0.7152020202020202
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01225948 0.01046491 0.01016545 0.00956416 0.009516 0.00957584
|
|
0.00863767 0.00872874 0.00848627 0.00919747]
|
|
|
|
mean value: 0.009659600257873536
|
|
|
|
key: score_time
|
|
value: [0.01747346 0.00969625 0.00995564 0.00951433 0.00944138 0.00937605
|
|
0.00860405 0.00933146 0.00853682 0.00934362]
|
|
|
|
mean value: 0.01012730598449707
|
|
|
|
key: test_mcc
|
|
value: [ 0.53935989 0.36514837 0.025 0.35675303 0.35675303 0.11952286
|
|
-0.2548236 0. 0.63245553 0.63245553]
|
|
|
|
mean value: 0.27726246589353587
|
|
|
|
key: train_mcc
|
|
value: [0.45108134 0.46581731 0.6090026 0.48768142 0.48694291 0.45589182
|
|
0.48694291 0.46579469 0.41239427 0.39569407]
|
|
|
|
mean value: 0.4717243340308452
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.69230769 0.53846154 0.66666667 0.66666667 0.58333333
|
|
0.5 0.58333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.6666666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.73636364 0.73636364 0.81818182 0.76576577 0.74774775 0.74774775
|
|
0.74774775 0.74774775 0.72972973 0.7027027 ]
|
|
|
|
mean value: 0.748009828009828
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.8 0.625 0.77777778 0.77777778 0.66666667
|
|
0.66666667 0.70588235 0.88888889 0.88888889]
|
|
|
|
mean value: 0.7639654282765738
|
|
|
|
key: train_fscore
|
|
value: [0.82208589 0.82424242 0.8630137 0.82432432 0.8313253 0.825
|
|
0.8313253 0.82278481 0.80519481 0.80473373]
|
|
|
|
mean value: 0.8254030282309115
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.66666667 0.625 0.63636364 0.63636364 0.625
|
|
0.54545455 0.66666667 0.8 0.8 ]
|
|
|
|
mean value: 0.6728787878787879
|
|
|
|
key: train_precision
|
|
value: [0.70526316 0.70103093 0.80769231 0.7721519 0.71134021 0.72527473
|
|
0.71134021 0.72222222 0.72093023 0.67326733]
|
|
|
|
mean value: 0.7250513211315167
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.625 1. 1. 0.71428571
|
|
0.85714286 0.75 1. 1. ]
|
|
|
|
mean value: 0.8946428571428572
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 0.92647059 0.88405797 1. 0.95652174
|
|
1. 0.95588235 0.91176471 1. ]
|
|
|
|
mean value: 0.961999147485081
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.6 0.5125 0.6 0.6 0.55714286
|
|
0.42857143 0.5 0.75 0.75 ]
|
|
|
|
mean value: 0.5998214285714285
|
|
|
|
key: train_roc_auc
|
|
value: [0.65931373 0.6547619 0.78466387 0.72774327 0.66666667 0.68064182
|
|
0.66666667 0.6872435 0.67681259 0.61627907]
|
|
|
|
mean value: 0.6820793079618096
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.66666667 0.45454545 0.63636364 0.63636364 0.5
|
|
0.5 0.54545455 0.8 0.8 ]
|
|
|
|
mean value: 0.6266666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.69791667 0.70103093 0.75903614 0.70114943 0.71134021 0.70212766
|
|
0.71134021 0.69892473 0.67391304 0.67326733]
|
|
|
|
mean value: 0.703004633770672
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00930357 0.00851154 0.00858212 0.00877905 0.00847721 0.00876355
|
|
0.00862432 0.00842738 0.00890303 0.00864983]
|
|
|
|
mean value: 0.00870215892791748
|
|
|
|
key: score_time
|
|
value: [0.00881672 0.00945139 0.00878429 0.00851703 0.0087235 0.00841832
|
|
0.00863028 0.0087266 0.00858259 0.00869751]
|
|
|
|
mean value: 0.008734822273254395
|
|
|
|
key: test_mcc
|
|
value: [ 0.73192505 -0.05773503 -0.1844662 0.29277002 0.11952286 -0.16903085
|
|
-0.65714286 0.11952286 0. 0. ]
|
|
|
|
mean value: 0.019536586661216014
|
|
|
|
key: train_mcc
|
|
value: [0.40035632 0.40579085 0.46078431 0.45906842 0.42546584 0.45906842
|
|
0.43004541 0.45197925 0.45724863 0.43960479]
|
|
|
|
mean value: 0.43894122364579036
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.53846154 0.46153846 0.66666667 0.58333333 0.41666667
|
|
0.16666667 0.58333333 0.5 0.58333333]
|
|
|
|
mean value: 0.5346153846153846
|
|
|
|
key: train_accuracy
|
|
value: [0.71818182 0.71818182 0.74545455 0.74774775 0.72972973 0.74774775
|
|
0.73873874 0.73873874 0.73873874 0.73873874]
|
|
|
|
mean value: 0.7361998361998362
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.66666667 0.58823529 0.75 0.66666667 0.46153846
|
|
0.16666667 0.66666667 0.57142857 0.70588235]
|
|
|
|
mean value: 0.610089420383538
|
|
|
|
key: train_fscore
|
|
value: [0.77372263 0.77037037 0.79411765 0.8 0.7826087 0.8
|
|
0.8 0.78518519 0.78195489 0.79432624]
|
|
|
|
mean value: 0.7882285654356577
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.55555556 0.66666667 0.625 0.5
|
|
0.2 0.71428571 0.66666667 0.66666667]
|
|
|
|
mean value: 0.619484126984127
|
|
|
|
key: train_precision
|
|
value: [0.76811594 0.7761194 0.79411765 0.78873239 0.7826087 0.78873239
|
|
0.76315789 0.79104478 0.8 0.76712329]
|
|
|
|
mean value: 0.781975243498493
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.625 0.85714286 0.71428571 0.42857143
|
|
0.14285714 0.625 0.5 0.75 ]
|
|
|
|
mean value: 0.6142857142857143
|
|
|
|
key: train_recall
|
|
value: [0.77941176 0.76470588 0.79411765 0.8115942 0.7826087 0.8115942
|
|
0.84057971 0.77941176 0.76470588 0.82352941]
|
|
|
|
mean value: 0.7952259164535379
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.475 0.4125 0.62857143 0.55714286 0.41428571
|
|
0.17142857 0.5625 0.5 0.5 ]
|
|
|
|
mean value: 0.5096428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [0.69922969 0.70378151 0.73039216 0.72722567 0.71273292 0.72722567
|
|
0.70600414 0.72691518 0.73119015 0.71409029]
|
|
|
|
mean value: 0.717878738957666
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.5 0.41666667 0.6 0.5 0.3
|
|
0.09090909 0.5 0.4 0.54545455]
|
|
|
|
mean value: 0.4603030303030303
|
|
|
|
key: train_jcc
|
|
value: [0.63095238 0.62650602 0.65853659 0.66666667 0.64285714 0.66666667
|
|
0.66666667 0.64634146 0.64197531 0.65882353]
|
|
|
|
mean value: 0.6505992434740138
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00838876 0.02356815 0.00974345 0.00844836 0.01123309 0.00812244
|
|
0.00860524 0.0086875 0.01128435 0.00895929]
|
|
|
|
mean value: 0.01070406436920166
|
|
|
|
key: score_time
|
|
value: [0.04546356 0.01977587 0.01663089 0.01476264 0.01403522 0.0145638
|
|
0.0156548 0.01584792 0.01040769 0.00951695]
|
|
|
|
mean value: 0.017665934562683106
|
|
|
|
key: test_mcc
|
|
value: [ 0.09759001 -0.33709993 -0.05773503 -0.09759001 -0.37796447 -0.09759001
|
|
-0.2548236 -0.40824829 -0.31622777 0.11952286]
|
|
|
|
mean value: -0.17301662297207385
|
|
|
|
key: train_mcc
|
|
value: [0.15817913 0.2851856 0.23719024 0.19227925 0.1969536 0.31107326
|
|
0.22196178 0.25410227 0.16633663 0.14099385]
|
|
|
|
mean value: 0.2164255608009283
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.46153846 0.53846154 0.5 0.41666667 0.5
|
|
0.5 0.41666667 0.5 0.58333333]
|
|
|
|
mean value: 0.49551282051282053
|
|
|
|
key: train_accuracy
|
|
value: [0.63636364 0.68181818 0.66363636 0.64864865 0.64864865 0.69369369
|
|
0.65765766 0.66666667 0.63063063 0.62162162]
|
|
|
|
mean value: 0.654938574938575
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.63157895 0.66666667 0.625 0.58823529 0.625
|
|
0.66666667 0.58823529 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6296144773698953
|
|
|
|
key: train_fscore
|
|
value: [0.75 0.7826087 0.76433121 0.75471698 0.75159236 0.78481013
|
|
0.75641026 0.76433121 0.73548387 0.73076923]
|
|
|
|
mean value: 0.7575053938583821
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.54545455 0.6 0.55555556 0.5 0.55555556
|
|
0.54545455 0.55555556 0.6 0.71428571]
|
|
|
|
mean value: 0.5838528138528138
|
|
|
|
key: train_precision
|
|
value: [0.65217391 0.67741935 0.6741573 0.66666667 0.67045455 0.69662921
|
|
0.67816092 0.6741573 0.65517241 0.64772727]
|
|
|
|
mean value: 0.6692718906288725
|
|
|
|
key: test_recall
|
|
value: [0.5 0.75 0.75 0.71428571 0.71428571 0.71428571
|
|
0.85714286 0.625 0.75 0.625 ]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_recall
|
|
value: [0.88235294 0.92647059 0.88235294 0.86956522 0.85507246 0.89855072
|
|
0.85507246 0.88235294 0.83823529 0.83823529]
|
|
|
|
mean value: 0.8728260869565218
|
|
|
|
key: test_roc_auc
|
|
value: [0.55 0.375 0.475 0.45714286 0.35714286 0.45714286
|
|
0.42857143 0.3125 0.375 0.5625 ]
|
|
|
|
mean value: 0.435
|
|
|
|
key: train_roc_auc
|
|
value: [0.56022409 0.60609244 0.59593838 0.57763975 0.58229814 0.62784679
|
|
0.5942029 0.60396717 0.57028044 0.55865253]
|
|
|
|
mean value: 0.5877142616399441
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.46153846 0.5 0.45454545 0.41666667 0.45454545
|
|
0.5 0.41666667 0.5 0.5 ]
|
|
|
|
mean value: 0.4603962703962704
|
|
|
|
key: train_jcc
|
|
value: [0.6 0.64285714 0.6185567 0.60606061 0.60204082 0.64583333
|
|
0.60824742 0.6185567 0.58163265 0.57575758]
|
|
|
|
mean value: 0.6099542952138681
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01100898 0.0107832 0.01118803 0.01089549 0.00967431 0.00996351
|
|
0.00958037 0.00947094 0.00933504 0.01006556]
|
|
|
|
mean value: 0.010196542739868164
|
|
|
|
key: score_time
|
|
value: [0.00916982 0.00935102 0.00953722 0.01004863 0.00892758 0.00883555
|
|
0.00861263 0.00887918 0.00933504 0.00899792]
|
|
|
|
mean value: 0.009169459342956543
|
|
|
|
key: test_mcc
|
|
value: [ 0.53935989 0.36514837 0.36514837 0.35675303 -0.2548236 0.35675303
|
|
0. -0.21320072 0. 0. ]
|
|
|
|
mean value: 0.15151383892490677
|
|
|
|
key: train_mcc
|
|
value: [0.46581731 0.54401741 0.56282341 0.48694291 0.50665218 0.58248237
|
|
0.50665218 0.60744732 0.55324183 0.43781764]
|
|
|
|
mean value: 0.5253894549869416
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.69230769 0.69230769 0.66666667 0.5 0.66666667
|
|
0.58333333 0.58333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6487179487179487
|
|
|
|
key: train_accuracy
|
|
value: [0.73636364 0.77272727 0.78181818 0.74774775 0.75675676 0.79279279
|
|
0.75675676 0.8018018 0.77477477 0.72072072]
|
|
|
|
mean value: 0.7642260442260442
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.8 0.8 0.77777778 0.66666667 0.77777778
|
|
0.73684211 0.73684211 0.8 0.8 ]
|
|
|
|
mean value: 0.7738011695906433
|
|
|
|
key: train_fscore
|
|
value: [0.82424242 0.8447205 0.85 0.8313253 0.83636364 0.85714286
|
|
0.83636364 0.86075949 0.8447205 0.81437126]
|
|
|
|
mean value: 0.8400009600262109
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.66666667 0.66666667 0.63636364 0.54545455 0.63636364
|
|
0.58333333 0.63636364 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6431818181818182
|
|
|
|
key: train_precision
|
|
value: [0.70103093 0.7311828 0.73913043 0.71134021 0.71875 0.75
|
|
0.71875 0.75555556 0.7311828 0.68686869]
|
|
|
|
mean value: 0.7243791402625319
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.85714286 1.
|
|
1. 0.875 1. 1. ]
|
|
|
|
mean value: 0.9732142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7 0.6 0.6 0.6 0.42857143 0.6
|
|
0.5 0.4375 0.5 0.5 ]
|
|
|
|
mean value: 0.5466071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.6547619 0.70238095 0.71428571 0.66666667 0.67857143 0.72619048
|
|
0.67857143 0.74418605 0.70930233 0.63953488]
|
|
|
|
mean value: 0.6914451827242525
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.66666667 0.66666667 0.63636364 0.5 0.63636364
|
|
0.58333333 0.58333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6333333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.70103093 0.7311828 0.73913043 0.71134021 0.71875 0.75
|
|
0.71875 0.75555556 0.7311828 0.68686869]
|
|
|
|
mean value: 0.7243791402625319
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.5538888 0.47991705 0.47801304 0.61600709 0.49233127 0.48040891
|
|
0.48740292 0.54901862 0.52566528 0.56194234]
|
|
|
|
mean value: 0.5224595308303833
|
|
|
|
key: score_time
|
|
value: [0.01236391 0.01229692 0.01237655 0.01229978 0.01245737 0.01221967
|
|
0.01235771 0.01267815 0.01228714 0.01299524]
|
|
|
|
mean value: 0.012433242797851563
|
|
|
|
key: test_mcc
|
|
value: [ 0.15811388 0.31754265 0.15811388 0.52915026 -0.37142857 0.11952286
|
|
0.29277002 0.40824829 0.35355339 0.47809144]
|
|
|
|
mean value: 0.24436781124643725
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.69230769 0.61538462 0.75 0.33333333 0.58333333
|
|
0.66666667 0.75 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6423076923076924
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.77777778 0.70588235 0.82352941 0.42857143 0.66666667
|
|
0.75 0.82352941 0.71428571 0.8 ]
|
|
|
|
mean value: 0.7196125116713352
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.7 0.66666667 0.7 0.42857143 0.625
|
|
0.66666667 0.77777778 0.83333333 0.85714286]
|
|
|
|
mean value: 0.6921825396825396
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.75 1. 0.42857143 0.71428571
|
|
0.85714286 0.875 0.625 0.75 ]
|
|
|
|
mean value: 0.7625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.575 0.6375 0.575 0.7 0.31428571 0.55714286
|
|
0.62857143 0.6875 0.6875 0.75 ]
|
|
|
|
mean value: 0.61125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.63636364 0.54545455 0.7 0.27272727 0.5
|
|
0.6 0.7 0.55555556 0.66666667]
|
|
|
|
mean value: 0.5722222222222222
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0153141 0.0142355 0.01135921 0.01111603 0.01088643 0.0111146
|
|
0.01125288 0.01121092 0.01074886 0.01101923]
|
|
|
|
mean value: 0.011825776100158692
|
|
|
|
key: score_time
|
|
value: [0.01174498 0.00923395 0.00900865 0.00852227 0.00854182 0.0086956
|
|
0.00855017 0.00860357 0.00844765 0.00856256]
|
|
|
|
mean value: 0.008991122245788574
|
|
|
|
key: test_mcc
|
|
value: [0.85391256 0.675 0.85391256 0.84515425 0.65714286 0.83666003
|
|
0.65714286 0.47809144 1. 0.83666003]
|
|
|
|
mean value: 0.7693676593476073
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.84615385 0.92307692 0.91666667 0.83333333 0.91666667
|
|
0.83333333 0.75 1. 0.91666667]
|
|
|
|
mean value: 0.8858974358974359
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.875 0.93333333 0.92307692 0.85714286 0.93333333
|
|
0.85714286 0.8 1. 0.93333333]
|
|
|
|
mean value: 0.904569597069597
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 1. 1. 0.85714286 0.875
|
|
0.85714286 0.85714286 1. 1. ]
|
|
|
|
mean value: 0.9321428571428572
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 0.875 0.85714286 0.85714286 1.
|
|
0.85714286 0.75 1. 0.875 ]
|
|
|
|
mean value: 0.8821428571428571
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.8375 0.9375 0.92857143 0.82857143 0.9
|
|
0.82857143 0.75 1. 0.9375 ]
|
|
|
|
mean value: 0.8885714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.77777778 0.875 0.85714286 0.75 0.875
|
|
0.75 0.66666667 1. 0.875 ]
|
|
|
|
mean value: 0.8301587301587301
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08682561 0.08593559 0.08586502 0.08684969 0.08592057 0.08690381
|
|
0.08726907 0.08530927 0.08518577 0.08504343]
|
|
|
|
mean value: 0.08611078262329101
|
|
|
|
key: score_time
|
|
value: [0.01694298 0.01753497 0.01696634 0.01706958 0.01736045 0.01758957
|
|
0.01705503 0.01677322 0.0170815 0.01679707]
|
|
|
|
mean value: 0.017117071151733398
|
|
|
|
key: test_mcc
|
|
value: [ 0.31754265 0.35 0.35 0.07559289 -0.23904572 0.11952286
|
|
0.07559289 0.47809144 0.40824829 0.47809144]
|
|
|
|
mean value: 0.24136367542559237
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.69230769 0.69230769 0.58333333 0.41666667 0.58333333
|
|
0.58333333 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.6493589743589744
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.75 0.75 0.70588235 0.53333333 0.66666667
|
|
0.70588235 0.8 0.82352941 0.8 ]
|
|
|
|
mean value: 0.7313071895424836
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.75 0.75 0.6 0.5 0.625
|
|
0.6 0.85714286 0.77777778 0.85714286]
|
|
|
|
mean value: 0.7017063492063492
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.75 0.75 0.85714286 0.57142857 0.71428571
|
|
0.85714286 0.75 0.875 0.75 ]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6375 0.675 0.675 0.52857143 0.38571429 0.55714286
|
|
0.52857143 0.75 0.6875 0.75 ]
|
|
|
|
mean value: 0.6175
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.6 0.6 0.54545455 0.36363636 0.5
|
|
0.54545455 0.66666667 0.7 0.66666667]
|
|
|
|
mean value: 0.5824242424242424
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00862598 0.00868583 0.00858784 0.00867367 0.00882292 0.00863862
|
|
0.00866914 0.00875616 0.0087266 0.00873733]
|
|
|
|
mean value: 0.008692407608032226
|
|
|
|
key: score_time
|
|
value: [0.00852489 0.00864029 0.00840664 0.00853038 0.0089252 0.0089848
|
|
0.00855589 0.00857258 0.00880885 0.00852013]
|
|
|
|
mean value: 0.008646965026855469
|
|
|
|
key: test_mcc
|
|
value: [-0.025 -0.4330127 -0.025 0.07559289 -0.48795004 0.11952286
|
|
0.31428571 0.35355339 -0.5 0. ]
|
|
|
|
mean value: -0.0608007877952213
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.46153846 0.38461538 0.46153846 0.58333333 0.33333333 0.58333333
|
|
0.66666667 0.66666667 0.33333333 0.5 ]
|
|
|
|
mean value: 0.49743589743589745
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.55555556 0.46153846 0.70588235 0.5 0.66666667
|
|
0.71428571 0.71428571 0.5 0.57142857]
|
|
|
|
mean value: 0.5851181498240322
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 0.5 0.6 0.6 0.44444444 0.625
|
|
0.71428571 0.83333333 0.5 0.66666667]
|
|
|
|
mean value: 0.6083730158730158
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.375 0.625 0.375 0.85714286 0.57142857 0.71428571
|
|
0.71428571 0.625 0.5 0.5 ]
|
|
|
|
mean value: 0.5857142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.4875 0.3125 0.4875 0.52857143 0.28571429 0.55714286
|
|
0.65714286 0.6875 0.25 0.5 ]
|
|
|
|
mean value: 0.47535714285714287
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.38461538 0.3 0.54545455 0.33333333 0.5
|
|
0.55555556 0.55555556 0.33333333 0.4 ]
|
|
|
|
mean value: 0.4207847707847708
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.10459781 1.22312737 1.14753699 1.15912485 1.12197685 1.17388844
|
|
1.14407277 1.08973622 1.10582209 1.1975131 ]
|
|
|
|
mean value: 1.146739649772644
|
|
|
|
key: score_time
|
|
value: [0.08704519 0.09567928 0.09131026 0.1528132 0.09443045 0.0932653
|
|
0.0874753 0.08719516 0.08769536 0.09550047]
|
|
|
|
mean value: 0.0972409963607788
|
|
|
|
key: test_mcc
|
|
value: [0.50069396 0.35 0.35 0.68313005 0.31428571 0.29277002
|
|
0.29277002 0.47809144 0.40824829 0.40824829]
|
|
|
|
mean value: 0.40782377966402844
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.69230769 0.69230769 0.83333333 0.66666667 0.66666667
|
|
0.66666667 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.7237179487179487
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.75 0.75 0.875 0.71428571 0.75
|
|
0.75 0.8 0.82352941 0.82352941]
|
|
|
|
mean value: 0.7859873949579832
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.75 0.75 0.77777778 0.71428571 0.66666667
|
|
0.66666667 0.85714286 0.77777778 0.77777778]
|
|
|
|
mean value: 0.7515873015873016
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.75 0.75 1. 0.71428571 0.85714286
|
|
0.85714286 0.75 0.875 0.875 ]
|
|
|
|
mean value: 0.8303571428571428
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7375 0.675 0.675 0.8 0.65714286 0.62857143
|
|
0.62857143 0.75 0.6875 0.6875 ]
|
|
|
|
mean value: 0.6926785714285715
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.6 0.6 0.77777778 0.55555556 0.6
|
|
0.6 0.66666667 0.7 0.7 ]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.71212268 0.90729356 0.87112045 0.86570859 0.95829868 0.85080886
|
|
0.86282325 0.88417912 0.91506934 0.90223193]
|
|
|
|
mean value: 0.9729656457901001
|
|
|
|
key: score_time
|
|
value: [0.21961141 0.15164089 0.17765403 0.21611881 0.2192595 0.21657538
|
|
0.20674014 0.20584822 0.17703295 0.24271965]
|
|
|
|
mean value: 0.20332009792327882
|
|
|
|
key: test_mcc
|
|
value: [0.84327404 0.31754265 0.15811388 0.52915026 0.47809144 0.52915026
|
|
0.07559289 0.625 0.15811388 0.625 ]
|
|
|
|
mean value: 0.43390293195441393
|
|
|
|
key: train_mcc
|
|
value: [0.84761434 0.8662719 0.86922699 0.86692145 0.86692145 0.86692145
|
|
0.84833359 0.88678314 0.90537026 0.88678314]
|
|
|
|
mean value: 0.8711147713763046
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.69230769 0.61538462 0.75 0.75 0.75
|
|
0.58333333 0.83333333 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7397435897435898
|
|
|
|
key: train_accuracy
|
|
value: [0.92727273 0.93636364 0.93636364 0.93693694 0.93693694 0.93693694
|
|
0.92792793 0.94594595 0.95495495 0.94594595]
|
|
|
|
mean value: 0.9385585585585585
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.77777778 0.70588235 0.82352941 0.8 0.82352941
|
|
0.70588235 0.875 0.77777778 0.875 ]
|
|
|
|
mean value: 0.8105555555555555
|
|
|
|
key: train_fscore
|
|
value: [0.94366197 0.95035461 0.95104895 0.95104895 0.95104895 0.95104895
|
|
0.94444444 0.95714286 0.96402878 0.95714286]
|
|
|
|
mean value: 0.9520971321664444
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.7 0.66666667 0.7 0.75 0.7
|
|
0.6 0.875 0.7 0.875 ]
|
|
|
|
mean value: 0.7455555555555555
|
|
|
|
key: train_precision
|
|
value: [0.90540541 0.91780822 0.90666667 0.91891892 0.91891892 0.91891892
|
|
0.90666667 0.93055556 0.94366197 0.93055556]
|
|
|
|
mean value: 0.9198076797615675
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 0.75 1. 0.85714286 1.
|
|
0.85714286 0.875 0.875 0.875 ]
|
|
|
|
mean value: 0.8964285714285715
|
|
|
|
key: train_recall
|
|
value: [0.98529412 0.98529412 1. 0.98550725 0.98550725 0.98550725
|
|
0.98550725 0.98529412 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9868499573742541
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.6375 0.575 0.7 0.72857143 0.7
|
|
0.52857143 0.8125 0.5625 0.8125 ]
|
|
|
|
mean value: 0.6957142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.90931373 0.92121849 0.91666667 0.92132505 0.92132505 0.92132505
|
|
0.90942029 0.93450752 0.94613543 0.93450752]
|
|
|
|
mean value: 0.9235744803482566
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.63636364 0.54545455 0.7 0.66666667 0.7
|
|
0.54545455 0.77777778 0.63636364 0.77777778]
|
|
|
|
mean value: 0.6874747474747475
|
|
|
|
key: train_jcc
|
|
value: [0.89333333 0.90540541 0.90666667 0.90666667 0.90666667 0.90666667
|
|
0.89473684 0.91780822 0.93055556 0.91780822]
|
|
|
|
mean value: 0.9086314241422389
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0094707 0.00932527 0.00876498 0.00853801 0.00867772 0.00861979
|
|
0.00886083 0.00872159 0.00855374 0.00877261]
|
|
|
|
mean value: 0.008830523490905762
|
|
|
|
key: score_time
|
|
value: [0.02030921 0.00867224 0.00857282 0.00857878 0.00850296 0.00849128
|
|
0.00858665 0.00845098 0.00853539 0.00862217]
|
|
|
|
mean value: 0.009732246398925781
|
|
|
|
key: test_mcc
|
|
value: [ 0.73192505 -0.05773503 -0.1844662 0.29277002 0.11952286 -0.16903085
|
|
-0.65714286 0.11952286 0. 0. ]
|
|
|
|
mean value: 0.019536586661216014
|
|
|
|
key: train_mcc
|
|
value: [0.40035632 0.40579085 0.46078431 0.45906842 0.42546584 0.45906842
|
|
0.43004541 0.45197925 0.45724863 0.43960479]
|
|
|
|
mean value: 0.43894122364579036
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.53846154 0.46153846 0.66666667 0.58333333 0.41666667
|
|
0.16666667 0.58333333 0.5 0.58333333]
|
|
|
|
mean value: 0.5346153846153846
|
|
|
|
key: train_accuracy
|
|
value: [0.71818182 0.71818182 0.74545455 0.74774775 0.72972973 0.74774775
|
|
0.73873874 0.73873874 0.73873874 0.73873874]
|
|
|
|
mean value: 0.7361998361998362
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.66666667 0.58823529 0.75 0.66666667 0.46153846
|
|
0.16666667 0.66666667 0.57142857 0.70588235]
|
|
|
|
mean value: 0.610089420383538
|
|
|
|
key: train_fscore
|
|
value: [0.77372263 0.77037037 0.79411765 0.8 0.7826087 0.8
|
|
0.8 0.78518519 0.78195489 0.79432624]
|
|
|
|
mean value: 0.7882285654356577
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.55555556 0.66666667 0.625 0.5
|
|
0.2 0.71428571 0.66666667 0.66666667]
|
|
|
|
mean value: 0.619484126984127
|
|
|
|
key: train_precision
|
|
value: [0.76811594 0.7761194 0.79411765 0.78873239 0.7826087 0.78873239
|
|
0.76315789 0.79104478 0.8 0.76712329]
|
|
|
|
mean value: 0.781975243498493
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.625 0.85714286 0.71428571 0.42857143
|
|
0.14285714 0.625 0.5 0.75 ]
|
|
|
|
mean value: 0.6142857142857143
|
|
|
|
key: train_recall
|
|
value: [0.77941176 0.76470588 0.79411765 0.8115942 0.7826087 0.8115942
|
|
0.84057971 0.77941176 0.76470588 0.82352941]
|
|
|
|
mean value: 0.7952259164535379
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.475 0.4125 0.62857143 0.55714286 0.41428571
|
|
0.17142857 0.5625 0.5 0.5 ]
|
|
|
|
mean value: 0.5096428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [0.69922969 0.70378151 0.73039216 0.72722567 0.71273292 0.72722567
|
|
0.70600414 0.72691518 0.73119015 0.71409029]
|
|
|
|
mean value: 0.717878738957666
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.5 0.41666667 0.6 0.5 0.3
|
|
0.09090909 0.5 0.4 0.54545455]
|
|
|
|
mean value: 0.4603030303030303
|
|
|
|
key: train_jcc
|
|
value: [0.63095238 0.62650602 0.65853659 0.66666667 0.64285714 0.66666667
|
|
0.66666667 0.64634146 0.64197531 0.65882353]
|
|
|
|
mean value: 0.6505992434740138
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.30405092 0.04107499 0.04473925 0.05178094 0.08746314 0.04149055
|
|
0.06101847 0.04490185 0.04554677 0.04562163]
|
|
|
|
mean value: 0.07676885128021241
|
|
|
|
key: score_time
|
|
value: [0.01090312 0.01123238 0.01081657 0.01051259 0.01217699 0.01038527
|
|
0.01019979 0.01016188 0.01048923 0.0102849 ]
|
|
|
|
mean value: 0.01071627140045166
|
|
|
|
key: test_mcc
|
|
value: [1. 0.50069396 0.53674504 0.84515425 0.65714286 0.83666003
|
|
0.83666003 0.625 0.625 0.81649658]
|
|
|
|
mean value: 0.7279552748848938
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.76923077 0.76923077 0.91666667 0.83333333 0.91666667
|
|
0.91666667 0.83333333 0.83333333 0.91666667]
|
|
|
|
mean value: 0.8705128205128205
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.82352941 0.8 0.92307692 0.85714286 0.93333333
|
|
0.93333333 0.875 0.875 0.94117647]
|
|
|
|
mean value: 0.8961592329239388
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.85714286 1. 0.85714286 0.875
|
|
0.875 0.875 0.875 0.88888889]
|
|
|
|
mean value: 0.888095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 0.75 0.85714286 0.85714286 1.
|
|
1. 0.875 0.875 1. ]
|
|
|
|
mean value: 0.9089285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.7375 0.775 0.92857143 0.82857143 0.9
|
|
0.9 0.8125 0.8125 0.875 ]
|
|
|
|
mean value: 0.8569642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.7 0.66666667 0.85714286 0.75 0.875
|
|
0.875 0.77777778 0.77777778 0.88888889]
|
|
|
|
mean value: 0.8168253968253968
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02990842 0.02701712 0.04433227 0.05773687 0.04427814 0.04527235
|
|
0.03927279 0.04975677 0.06447816 0.04077959]
|
|
|
|
mean value: 0.04428324699401855
|
|
|
|
key: score_time
|
|
value: [0.01164579 0.02096772 0.02248883 0.02260923 0.0201776 0.03288007
|
|
0.02291846 0.02261591 0.02087259 0.02563477]
|
|
|
|
mean value: 0.02228109836578369
|
|
|
|
key: test_mcc
|
|
value: [ 0.025 0.35 0.41475753 0.02857143 -0.84515425 0.23904572
|
|
0.07559289 0.83666003 0.35355339 0.25 ]
|
|
|
|
mean value: 0.17280267384421122
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.53846154 0.69230769 0.69230769 0.5 0.08333333 0.58333333
|
|
0.58333333 0.91666667 0.66666667 0.66666667]
|
|
|
|
mean value: 0.5923076923076923
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.75 0.71428571 0.5 0.15384615 0.54545455
|
|
0.70588235 0.93333333 0.71428571 0.75 ]
|
|
|
|
mean value: 0.6392087814146638
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.75 0.83333333 0.6 0.16666667 0.75
|
|
0.6 1. 0.83333333 0.75 ]
|
|
|
|
mean value: 0.6908333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.75 0.625 0.42857143 0.14285714 0.42857143
|
|
0.85714286 0.875 0.625 0.75 ]
|
|
|
|
mean value: 0.6107142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5125 0.675 0.7125 0.51428571 0.07142857 0.61428571
|
|
0.52857143 0.9375 0.6875 0.625 ]
|
|
|
|
mean value: 0.5878571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.6 0.55555556 0.33333333 0.08333333 0.375
|
|
0.54545455 0.875 0.55555556 0.6 ]
|
|
|
|
mean value: 0.49777777777777776
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02160525 0.00896573 0.00845146 0.00846839 0.00850272 0.00860405
|
|
0.00858736 0.00884891 0.0085218 0.00864959]
|
|
|
|
mean value: 0.009920525550842284
|
|
|
|
key: score_time
|
|
value: [0.00903749 0.00864482 0.00847673 0.00845194 0.00846624 0.00854897
|
|
0.00866199 0.00849605 0.00855637 0.00866365]
|
|
|
|
mean value: 0.008600425720214844
|
|
|
|
key: test_mcc
|
|
value: [ 0.50069396 0.50069396 -0.1844662 0.35675303 0.11952286 0.11952286
|
|
-0.2548236 0.15811388 0.40824829 0.40824829]
|
|
|
|
mean value: 0.213250735296738
|
|
|
|
key: train_mcc
|
|
value: [0.44420485 0.46407039 0.4227351 0.38383126 0.4252358 0.46792623
|
|
0.44667184 0.35125794 0.39208478 0.36812371]
|
|
|
|
mean value: 0.41661419060358923
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.76923077 0.46153846 0.66666667 0.58333333 0.58333333
|
|
0.5 0.66666667 0.75 0.75 ]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_accuracy
|
|
value: [0.74545455 0.75454545 0.73636364 0.72072072 0.73873874 0.75675676
|
|
0.74774775 0.7027027 0.72072072 0.71171171]
|
|
|
|
mean value: 0.7335462735462736
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.82352941 0.58823529 0.77777778 0.66666667 0.66666667
|
|
0.66666667 0.77777778 0.82352941 0.82352941]
|
|
|
|
mean value: 0.7437908496732026
|
|
|
|
key: train_fscore
|
|
value: [0.80821918 0.81879195 0.80272109 0.79194631 0.80536913 0.81632653
|
|
0.81081081 0.7755102 0.78911565 0.79220779]
|
|
|
|
mean value: 0.8011018633038886
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.77777778 0.55555556 0.63636364 0.625 0.625
|
|
0.54545455 0.7 0.77777778 0.77777778]
|
|
|
|
mean value: 0.6798484848484848
|
|
|
|
key: train_precision
|
|
value: [0.75641026 0.75308642 0.74683544 0.7375 0.75 0.76923077
|
|
0.75949367 0.72151899 0.73417722 0.70930233]
|
|
|
|
mean value: 0.7437555087431204
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 0.625 1. 0.71428571 0.71428571
|
|
0.85714286 0.875 0.875 0.875 ]
|
|
|
|
mean value: 0.8285714285714285
|
|
|
|
key: train_recall
|
|
value: [0.86764706 0.89705882 0.86764706 0.85507246 0.86956522 0.86956522
|
|
0.86956522 0.83823529 0.85294118 0.89705882]
|
|
|
|
mean value: 0.8684356351236147
|
|
|
|
key: test_roc_auc
|
|
value: [0.7375 0.7375 0.4125 0.6 0.55714286 0.55714286
|
|
0.42857143 0.5625 0.6875 0.6875 ]
|
|
|
|
mean value: 0.5967857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.70763305 0.71043417 0.69572829 0.67753623 0.69668737 0.72049689
|
|
0.70859213 0.66330369 0.68228454 0.65783174]
|
|
|
|
mean value: 0.6920528120247088
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.7 0.41666667 0.63636364 0.5 0.5
|
|
0.5 0.63636364 0.7 0.7 ]
|
|
|
|
mean value: 0.5989393939393939
|
|
|
|
key: train_jcc
|
|
value: [0.67816092 0.69318182 0.67045455 0.65555556 0.6741573 0.68965517
|
|
0.68181818 0.63333333 0.65168539 0.65591398]
|
|
|
|
mean value: 0.6683916201421294
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01095843 0.01384926 0.01324844 0.01242733 0.01249099 0.01315689
|
|
0.01342034 0.01367736 0.01398945 0.01427722]
|
|
|
|
mean value: 0.013149571418762208
|
|
|
|
key: score_time
|
|
value: [0.00927925 0.01075411 0.01079679 0.01129508 0.01130104 0.01150632
|
|
0.01130939 0.0113399 0.01137495 0.01127005]
|
|
|
|
mean value: 0.011022686958312988
|
|
|
|
key: test_mcc
|
|
value: [ 0.53674504 0.675 -0.05773503 0.23904572 0.07559289 -0.02857143
|
|
0.5976143 0. 0.5 0.35355339]
|
|
|
|
mean value: 0.2891244896360497
|
|
|
|
key: train_mcc
|
|
value: [0.8647415 0.75782808 0.83299313 0.60869565 0.79305807 0.79763243
|
|
0.60869565 0.88578234 0.87833398 0.94346837]
|
|
|
|
mean value: 0.7971229225524747
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.84615385 0.53846154 0.58333333 0.58333333 0.5
|
|
0.75 0.58333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6487179487179487
|
|
|
|
key: train_accuracy
|
|
value: [0.93636364 0.86363636 0.91818182 0.75675676 0.9009009 0.9009009
|
|
0.75675676 0.94594595 0.93693694 0.97297297]
|
|
|
|
mean value: 0.888935298935299
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.875 0.66666667 0.54545455 0.70588235 0.57142857
|
|
0.72727273 0.70588235 0.66666667 0.71428571]
|
|
|
|
mean value: 0.6978539597657244
|
|
|
|
key: train_fscore
|
|
value: [0.94890511 0.87603306 0.93793103 0.75675676 0.92517007 0.91729323
|
|
0.75675676 0.95652174 0.94573643 0.97777778]
|
|
|
|
mean value: 0.899888196746322
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.875 0.6 0.75 0.6 0.57142857
|
|
1. 0.66666667 1. 0.83333333]
|
|
|
|
mean value: 0.7753571428571429
|
|
|
|
key: train_precision
|
|
value: [0.94202899 1. 0.88311688 1. 0.87179487 0.953125
|
|
1. 0.94285714 1. 0.98507463]
|
|
|
|
mean value: 0.9577997510141816
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.75 0.42857143 0.85714286 0.57142857
|
|
0.57142857 0.75 0.5 0.625 ]
|
|
|
|
mean value: 0.6678571428571428
|
|
|
|
key: train_recall
|
|
value: [0.95588235 0.77941176 1. 0.60869565 0.98550725 0.88405797
|
|
0.60869565 0.97058824 0.89705882 0.97058824]
|
|
|
|
mean value: 0.8660485933503836
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.8375 0.475 0.61428571 0.52857143 0.48571429
|
|
0.78571429 0.5 0.75 0.6875 ]
|
|
|
|
mean value: 0.6439285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.93032213 0.88970588 0.89285714 0.80434783 0.873706 0.9063147
|
|
0.80434783 0.93878249 0.94852941 0.97366621]
|
|
|
|
mean value: 0.8962579622344388
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.77777778 0.5 0.375 0.54545455 0.4
|
|
0.57142857 0.54545455 0.5 0.55555556]
|
|
|
|
mean value: 0.5437337662337662
|
|
|
|
key: train_jcc
|
|
value: [0.90277778 0.77941176 0.88311688 0.60869565 0.86075949 0.84722222
|
|
0.60869565 0.91666667 0.89705882 0.95652174]
|
|
|
|
mean value: 0.8260926675167991
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01353407 0.01291561 0.01333976 0.01271963 0.01242924 0.0133822
|
|
0.01277709 0.01301098 0.01293945 0.01321888]
|
|
|
|
mean value: 0.013026690483093262
|
|
|
|
key: score_time
|
|
value: [0.01141334 0.01138353 0.01132226 0.01126695 0.01123738 0.01134562
|
|
0.0113399 0.01128531 0.01133299 0.01133704]
|
|
|
|
mean value: 0.011326432228088379
|
|
|
|
key: test_mcc
|
|
value: [0.53674504 0.69282032 0.21957752 0.50709255 0.02857143 0.
|
|
0. 0.70710678 0.5976143 0.35355339]
|
|
|
|
mean value: 0.3643081337418221
|
|
|
|
key: train_mcc
|
|
value: [0.92700326 0.88445378 0.84530046 0.75659284 0.65633012 0.52601495
|
|
0.40331021 0.71409851 0.90773323 0.96304256]
|
|
|
|
mean value: 0.7583879927847035
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.84615385 0.61538462 0.75 0.5 0.58333333
|
|
0.58333333 0.83333333 0.75 0.66666667]
|
|
|
|
mean value: 0.6897435897435897
|
|
|
|
key: train_accuracy
|
|
value: [0.96363636 0.94545455 0.91818182 0.87387387 0.79279279 0.76576577
|
|
0.71171171 0.84684685 0.95495495 0.98198198]
|
|
|
|
mean value: 0.8755200655200654
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.88888889 0.66666667 0.76923077 0.5 0.73684211
|
|
0.73684211 0.85714286 0.76923077 0.71428571]
|
|
|
|
mean value: 0.7439129875971981
|
|
|
|
key: train_fscore
|
|
value: [0.96969697 0.95588235 0.92913386 0.890625 0.8 0.84146341
|
|
0.81176471 0.86178862 0.96240602 0.98507463]
|
|
|
|
mean value: 0.9007835561211807
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.8 0.71428571 0.83333333 0.6 0.58333333
|
|
0.58333333 1. 1. 0.83333333]
|
|
|
|
mean value: 0.7804761904761905
|
|
|
|
key: train_precision
|
|
value: [1. 0.95588235 1. 0.96610169 1. 0.72631579
|
|
0.68316832 0.96363636 0.98461538 1. ]
|
|
|
|
mean value: 0.9279719902413547
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.625 0.71428571 0.42857143 1.
|
|
1. 0.75 0.625 0.625 ]
|
|
|
|
mean value: 0.7517857142857143
|
|
|
|
key: train_recall
|
|
value: [0.94117647 0.95588235 0.86764706 0.82608696 0.66666667 1.
|
|
1. 0.77941176 0.94117647 0.97058824]
|
|
|
|
mean value: 0.8948635976129582
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.8 0.6125 0.75714286 0.51428571 0.5
|
|
0.5 0.875 0.8125 0.6875 ]
|
|
|
|
mean value: 0.6833928571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.97058824 0.94222689 0.93382353 0.88923395 0.83333333 0.69047619
|
|
0.61904762 0.86645007 0.95896033 0.98529412]
|
|
|
|
mean value: 0.8689434267134558
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.8 0.5 0.625 0.33333333 0.58333333
|
|
0.58333333 0.75 0.625 0.55555556]
|
|
|
|
mean value: 0.6022222222222222
|
|
|
|
key: train_jcc
|
|
value: [0.94117647 0.91549296 0.86764706 0.8028169 0.66666667 0.72631579
|
|
0.68316832 0.75714286 0.92753623 0.97058824]
|
|
|
|
mean value: 0.8258551485859761
|
|
|
|
MCC on Blind test: 0.77
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10429573 0.08959389 0.08827043 0.08976412 0.09442878 0.09678459
|
|
0.09338832 0.09471059 0.09185529 0.08974409]
|
|
|
|
mean value: 0.09328358173370362
|
|
|
|
key: score_time
|
|
value: [0.01471567 0.01455045 0.01528144 0.01498342 0.01523328 0.01609302
|
|
0.01565933 0.01579595 0.01595545 0.01480484]
|
|
|
|
mean value: 0.015307283401489258
|
|
|
|
key: test_mcc
|
|
value: [0.675 0.84327404 0.53674504 0.84515425 0.47809144 0.83666003
|
|
0.65714286 0.81649658 0.625 0.63245553]
|
|
|
|
mean value: 0.694601977793387
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84615385 0.92307692 0.76923077 0.91666667 0.75 0.91666667
|
|
0.83333333 0.91666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8538461538461538
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.94117647 0.8 0.92307692 0.8 0.93333333
|
|
0.85714286 0.94117647 0.875 0.88888889]
|
|
|
|
mean value: 0.8834794943618473
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.88888889 0.85714286 1. 0.75 0.875
|
|
0.85714286 0.88888889 0.875 0.8 ]
|
|
|
|
mean value: 0.8667063492063491
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 0.75 0.85714286 0.85714286 1.
|
|
0.85714286 1. 0.875 1. ]
|
|
|
|
mean value: 0.9071428571428571
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8375 0.9 0.775 0.92857143 0.72857143 0.9
|
|
0.82857143 0.875 0.8125 0.75 ]
|
|
|
|
mean value: 0.8335714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.88888889 0.66666667 0.85714286 0.66666667 0.875
|
|
0.75 0.88888889 0.77777778 0.8 ]
|
|
|
|
mean value: 0.7948809523809524
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03741598 0.03802299 0.04370952 0.0472815 0.03707099 0.05534434
|
|
0.0436213 0.03756762 0.04712677 0.04655313]
|
|
|
|
mean value: 0.04337141513824463
|
|
|
|
key: score_time
|
|
value: [0.0241375 0.03943944 0.02140975 0.01842046 0.02922559 0.02238369
|
|
0.02023005 0.01715541 0.0278213 0.04356241]
|
|
|
|
mean value: 0.026378560066223144
|
|
|
|
key: test_mcc
|
|
value: [0.85391256 0.50069396 0.675 0.84515425 0.83666003 0.83666003
|
|
0.65714286 0.47809144 0.625 0.81649658]
|
|
|
|
mean value: 0.7124811716290969
|
|
|
|
key: train_mcc
|
|
value: [0.98100984 1. 0.9808378 0.96169772 0.98094082 0.98094082
|
|
0.96169772 1. 0.98111593 0.90773323]
|
|
|
|
mean value: 0.9735973887691677
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.76923077 0.84615385 0.91666667 0.91666667 0.91666667
|
|
0.83333333 0.75 0.83333333 0.91666667]
|
|
|
|
mean value: 0.8621794871794872
|
|
|
|
key: train_accuracy
|
|
value: [0.99090909 1. 0.99090909 0.98198198 0.99099099 0.99099099
|
|
0.98198198 1. 0.99099099 0.95495495]
|
|
|
|
mean value: 0.9873710073710074
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.82352941 0.875 0.92307692 0.93333333 0.93333333
|
|
0.85714286 0.8 0.875 0.94117647]
|
|
|
|
mean value: 0.8894925662572721
|
|
|
|
key: train_fscore
|
|
value: [0.99259259 1. 0.99270073 0.98550725 0.99280576 0.99280576
|
|
0.98550725 1. 0.99270073 0.96240602]
|
|
|
|
mean value: 0.9897026071029191
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.875 1. 0.875 0.875
|
|
0.85714286 0.85714286 0.875 0.88888889]
|
|
|
|
mean value: 0.888095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.98550725 0.98550725 0.98571429 0.98571429
|
|
0.98550725 1. 0.98550725 0.98461538]
|
|
|
|
mean value: 0.9898072941551203
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 0.875 0.85714286 1. 1.
|
|
0.85714286 0.75 0.875 1. ]
|
|
|
|
mean value: 0.8964285714285715
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 1. 0.98550725 1. 1.
|
|
0.98550725 1. 1. 0.94117647]
|
|
|
|
mean value: 0.9897485080988917
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.7375 0.8375 0.92857143 0.9 0.9
|
|
0.82857143 0.75 0.8125 0.875 ]
|
|
|
|
mean value: 0.8507142857142858
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 1. 0.98809524 0.98084886 0.98809524 0.98809524
|
|
0.98084886 1. 0.98837209 0.95896033]
|
|
|
|
mean value: 0.986596291701716
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.7 0.77777778 0.85714286 0.875 0.875
|
|
0.75 0.66666667 0.77777778 0.88888889]
|
|
|
|
mean value: 0.8043253968253968
|
|
|
|
key: train_jcc
|
|
value: [0.98529412 1. 0.98550725 0.97142857 0.98571429 0.98571429
|
|
0.97142857 1. 0.98550725 0.92753623]
|
|
|
|
mean value: 0.9798130556570455
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03727388 0.03465199 0.0383389 0.0460844 0.04978299 0.04043269
|
|
0.05224276 0.0568285 0.04102182 0.07268953]
|
|
|
|
mean value: 0.04693474769592285
|
|
|
|
key: score_time
|
|
value: [0.02114868 0.02298331 0.02409148 0.02365351 0.02428341 0.02402425
|
|
0.02286696 0.01960135 0.02328157 0.0190928 ]
|
|
|
|
mean value: 0.022502732276916505
|
|
|
|
key: test_mcc
|
|
value: [ 0.15811388 0.15811388 -0.3 0.11952286 -0.23904572 0.11952286
|
|
-0.2548236 -0.40824829 0. 0.35355339]
|
|
|
|
mean value: -0.029329072957256407
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.61538462 0.38461538 0.58333333 0.41666667 0.58333333
|
|
0.5 0.41666667 0.58333333 0.66666667]
|
|
|
|
mean value: 0.5365384615384615
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.70588235 0.5 0.66666667 0.53333333 0.66666667
|
|
0.66666667 0.58823529 0.70588235 0.71428571]
|
|
|
|
mean value: 0.6453501400560224
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.5 0.625 0.5 0.625
|
|
0.54545455 0.55555556 0.66666667 0.83333333]
|
|
|
|
mean value: 0.6184343434343434
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.5 0.71428571 0.57142857 0.71428571
|
|
0.85714286 0.625 0.75 0.625 ]
|
|
|
|
mean value: 0.6857142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.575 0.575 0.35 0.55714286 0.38571429 0.55714286
|
|
0.42857143 0.3125 0.5 0.6875 ]
|
|
|
|
mean value: 0.4928571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.54545455 0.33333333 0.5 0.36363636 0.5
|
|
0.5 0.41666667 0.54545455 0.55555556]
|
|
|
|
mean value: 0.48055555555555557
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.25822425 0.2295289 0.22716832 0.23751235 0.2359283 0.23824382
|
|
0.24219608 0.22907352 0.23247099 0.22575021]
|
|
|
|
mean value: 0.23560967445373535
|
|
|
|
key: score_time
|
|
value: [0.01001358 0.00984526 0.00975204 0.00899196 0.00957942 0.00982308
|
|
0.00890255 0.00877237 0.00896358 0.00889277]
|
|
|
|
mean value: 0.00935366153717041
|
|
|
|
key: test_mcc
|
|
value: [0.85391256 0.50069396 0.85391256 1. 0.65714286 0.83666003
|
|
0.83666003 0.625 0.81649658 0.81649658]
|
|
|
|
mean value: 0.7796975162586387
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92307692 0.76923077 0.92307692 1. 0.83333333 0.91666667
|
|
0.91666667 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.8948717948717949
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.82352941 0.93333333 1. 0.85714286 0.93333333
|
|
0.93333333 0.875 0.94117647 0.94117647]
|
|
|
|
mean value: 0.9171358543417367
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 1. 1. 0.85714286 0.875
|
|
0.875 0.875 0.88888889 0.88888889]
|
|
|
|
mean value: 0.9037698412698413
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 0.875 1. 0.85714286 1.
|
|
1. 0.875 1. 1. ]
|
|
|
|
mean value: 0.9357142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.7375 0.9375 1. 0.82857143 0.9
|
|
0.9 0.8125 0.875 0.875 ]
|
|
|
|
mean value: 0.8803571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.7 0.875 1. 0.75 0.875
|
|
0.875 0.77777778 0.88888889 0.88888889]
|
|
|
|
mean value: 0.8505555555555555
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01249194 0.01529217 0.01575756 0.01598454 0.01709008 0.01641655
|
|
0.01630306 0.01717377 0.01682758 0.0165267 ]
|
|
|
|
mean value: 0.01598639488220215
|
|
|
|
key: score_time
|
|
value: [0.01156449 0.01154613 0.01195145 0.01173139 0.01182127 0.01202178
|
|
0.01203108 0.01218295 0.01476431 0.01200366]
|
|
|
|
mean value: 0.012161850929260254
|
|
|
|
key: test_mcc
|
|
value: [ 0.36514837 0.15811388 -0.05773503 0.52915026 -0.23904572 0.07559289
|
|
0.07559289 -0.125 0.63245553 -0.11952286]
|
|
|
|
mean value: 0.12947502284095339
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.69230769 0.61538462 0.53846154 0.75 0.41666667 0.58333333
|
|
0.58333333 0.5 0.83333333 0.41666667]
|
|
|
|
mean value: 0.592948717948718
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.70588235 0.66666667 0.82352941 0.53333333 0.70588235
|
|
0.70588235 0.625 0.88888889 0.46153846]
|
|
|
|
mean value: 0.6916603821015586
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.6 0.7 0.5 0.6
|
|
0.6 0.625 0.8 0.6 ]
|
|
|
|
mean value: 0.6358333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 0.75 1. 0.57142857 0.85714286
|
|
0.85714286 0.625 1. 0.375 ]
|
|
|
|
mean value: 0.7785714285714286
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.575 0.475 0.7 0.38571429 0.52857143
|
|
0.52857143 0.4375 0.75 0.4375 ]
|
|
|
|
mean value: 0.5417857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.54545455 0.5 0.7 0.36363636 0.54545455
|
|
0.54545455 0.45454545 0.8 0.3 ]
|
|
|
|
mean value: 0.5421212121212121
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03742433 0.03180718 0.02373123 0.0452702 0.04741025 0.03486013
|
|
0.02837348 0.0303812 0.03403378 0.04727602]
|
|
|
|
mean value: 0.03605678081512451
|
|
|
|
key: score_time
|
|
value: [0.02073669 0.02347708 0.02209353 0.03610921 0.01207447 0.02378178
|
|
0.02210712 0.02429295 0.02334523 0.02435279]
|
|
|
|
mean value: 0.023237085342407225
|
|
|
|
key: test_mcc
|
|
value: [0.15811388 0.84327404 0.35 0.83666003 0.47809144 0.29277002
|
|
0.68313005 0.63245553 0.5976143 0.625 ]
|
|
|
|
mean value: 0.5497109305637226
|
|
|
|
key: train_mcc
|
|
value: [0.96185761 0.96148459 1. 0.96169772 0.98094082 0.94333502
|
|
0.98094082 0.94298433 0.98111593 0.98111593]
|
|
|
|
mean value: 0.9695472782473309
|
|
|
|
key: test_accuracy
|
|
value: [0.61538462 0.92307692 0.69230769 0.91666667 0.75 0.66666667
|
|
0.83333333 0.83333333 0.75 0.83333333]
|
|
|
|
mean value: 0.7814102564102564
|
|
|
|
key: train_accuracy
|
|
value: [0.98181818 0.98181818 1. 0.98198198 0.99099099 0.97297297
|
|
0.99099099 0.97297297 0.99099099 0.99099099]
|
|
|
|
mean value: 0.9855528255528255
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.94117647 0.75 0.93333333 0.8 0.75
|
|
0.875 0.88888889 0.76923077 0.875 ]
|
|
|
|
mean value: 0.8288511814982403
|
|
|
|
key: train_fscore
|
|
value: [0.98550725 0.98529412 1. 0.98550725 0.99280576 0.9787234
|
|
0.99280576 0.97810219 0.99270073 0.99270073]
|
|
|
|
mean value: 0.9884147175082405
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.88888889 0.75 0.875 0.75 0.66666667
|
|
0.77777778 0.8 1. 0.875 ]
|
|
|
|
mean value: 0.8049999999999999
|
|
|
|
key: train_precision
|
|
value: [0.97142857 0.98529412 1. 0.98550725 0.98571429 0.95833333
|
|
0.98571429 0.97101449 0.98550725 0.98550725]
|
|
|
|
mean value: 0.9814020825721593
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.75 1. 0.85714286 0.85714286
|
|
1. 1. 0.625 0.875 ]
|
|
|
|
mean value: 0.8714285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 0.98529412 1. 0.98550725 1. 1.
|
|
1. 0.98529412 1. 1. ]
|
|
|
|
mean value: 0.9956095481670929
|
|
|
|
key: test_roc_auc
|
|
value: [0.575 0.9 0.675 0.9 0.72857143 0.62857143
|
|
0.8 0.75 0.8125 0.8125 ]
|
|
|
|
mean value: 0.7582142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.97619048 0.9807423 1. 0.98084886 0.98809524 0.96428571
|
|
0.98809524 0.96939124 0.98837209 0.98837209]
|
|
|
|
mean value: 0.9824393255785631
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.88888889 0.6 0.875 0.66666667 0.6
|
|
0.77777778 0.8 0.625 0.77777778]
|
|
|
|
mean value: 0.7156565656565657
|
|
|
|
key: train_jcc
|
|
value: [0.97142857 0.97101449 1. 0.97142857 0.98571429 0.95833333
|
|
0.98571429 0.95714286 0.98550725 0.98550725]
|
|
|
|
mean value: 0.9771790890269152
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.2219398 0.19439697 0.20206976 0.22562408 0.25222993 0.23339581
|
|
0.14557576 0.18831682 0.2224791 0.22500443]
|
|
|
|
mean value: 0.2111032485961914
|
|
|
|
key: score_time
|
|
value: [0.0220089 0.02060461 0.01972795 0.02387714 0.01905584 0.02754092
|
|
0.01167583 0.02154541 0.02152109 0.01703382]
|
|
|
|
mean value: 0.02045915126800537
|
|
|
|
key: test_mcc
|
|
value: [0.50069396 0.84327404 0.35 1. 0.50709255 0.29277002
|
|
0.68313005 0.81649658 0.5976143 0.25 ]
|
|
|
|
mean value: 0.5841071516952127
|
|
|
|
key: train_mcc
|
|
value: [1. 0.96148459 1. 1. 1. 0.94333502
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9904819609821227
|
|
|
|
key: test_accuracy
|
|
value: [0.76923077 0.92307692 0.69230769 1. 0.75 0.66666667
|
|
0.83333333 0.91666667 0.75 0.66666667]
|
|
|
|
mean value: 0.7967948717948719
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98181818 1. 1. 1. 0.97297297
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9954791154791155
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.94117647 0.75 1. 0.76923077 0.75
|
|
0.875 0.94117647 0.76923077 0.75 ]
|
|
|
|
mean value: 0.8369343891402715
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[1. 0.98529412 1. 1. 1. 0.9787234
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9964017521902377
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.88888889 0.75 1. 0.83333333 0.66666667
|
|
0.77777778 0.88888889 1. 0.75 ]
|
|
|
|
mean value: 0.8333333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 0.98529412 1. 1. 1. 0.95833333
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9943627450980392
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 0.75 1. 0.71428571 0.85714286
|
|
1. 1. 0.625 0.75 ]
|
|
|
|
mean value: 0.8571428571428571
|
|
|
|
key: train_recall
|
|
value: [1. 0.98529412 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985294117647059
|
|
|
|
key: test_roc_auc
|
|
value: [0.7375 0.9 0.675 1. 0.75714286 0.62857143
|
|
0.8 0.875 0.8125 0.625 ]
|
|
|
|
mean value: 0.7810714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.9807423 1. 1. 1. 0.96428571
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9945028011204482
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.88888889 0.6 1. 0.625 0.6
|
|
0.77777778 0.88888889 0.625 0.6 ]
|
|
|
|
mean value: 0.7305555555555555
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97101449 1. 1. 1. 0.95833333
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9929347826086956
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03014159 0.02186394 0.02349424 0.02804756 0.08671212 0.03004718
|
|
0.02699184 0.08600616 0.02971935 0.0292623 ]
|
|
|
|
mean value: 0.039228630065917966
|
|
|
|
key: score_time
|
|
value: [0.01463342 0.0103395 0.01177096 0.01179242 0.01192975 0.01194787
|
|
0.02830076 0.01288271 0.0126574 0.01292181]
|
|
|
|
mean value: 0.013917660713195801
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.37796447 0.76376262 0.37796447 0.19642857 0.87287156
|
|
1. 0.60714286 0.6000992 0.73214286]
|
|
|
|
mean value: 0.6028376606651662
|
|
|
|
key: train_mcc
|
|
value: [0.88388348 0.91215932 0.88476385 0.85400682 0.86948194 0.8687127
|
|
0.85434012 0.85434012 0.88320546 0.86868474]
|
|
|
|
mean value: 0.873357854621197
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.6875 0.86666667 0.66666667 0.6 0.93333333
|
|
1. 0.8 0.8 0.86666667]
|
|
|
|
mean value: 0.7970833333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.94117647 0.95588235 0.94160584 0.9270073 0.93430657 0.93430657
|
|
0.9270073 0.9270073 0.94160584 0.93430657]
|
|
|
|
mean value: 0.9364212108200944
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 0.875 0.70588235 0.57142857 0.92307692
|
|
1. 0.8 0.82352941 0.875 ]
|
|
|
|
mean value: 0.7990583925878043
|
|
|
|
key: train_fscore
|
|
value: [0.93939394 0.95522388 0.94029851 0.92753623 0.93333333 0.93430657
|
|
0.92537313 0.92537313 0.94117647 0.93333333]
|
|
|
|
mean value: 0.9355348534592384
|
|
|
|
key: test_precision
|
|
value: [0.75 0.71428571 0.77777778 0.6 0.57142857 1.
|
|
1. 0.85714286 0.77777778 0.875 ]
|
|
|
|
mean value: 0.7923412698412698
|
|
|
|
key: train_precision
|
|
value: [0.96875 0.96969697 0.96923077 0.92753623 0.95454545 0.94117647
|
|
0.93939394 0.93939394 0.94117647 0.94029851]
|
|
|
|
mean value: 0.9491198752784288
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 1. 0.85714286 0.57142857 0.85714286
|
|
1. 0.75 0.875 0.875 ]
|
|
|
|
mean value: 0.8160714285714286
|
|
|
|
key: train_recall
|
|
value: [0.91176471 0.94117647 0.91304348 0.92753623 0.91304348 0.92753623
|
|
0.91176471 0.91176471 0.94117647 0.92647059]
|
|
|
|
mean value: 0.9225277067348678
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.6875 0.875 0.67857143 0.59821429 0.92857143
|
|
1. 0.80357143 0.79464286 0.86607143]
|
|
|
|
mean value: 0.7982142857142858
|
|
|
|
key: train_roc_auc
|
|
value: [0.94117647 0.95588235 0.94181586 0.92700341 0.93446292 0.93435635
|
|
0.92689685 0.92689685 0.94160273 0.93424979]
|
|
|
|
mean value: 0.9364343563512361
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 0.77777778 0.54545455 0.4 0.85714286
|
|
1. 0.66666667 0.7 0.77777778]
|
|
|
|
mean value: 0.6824819624819625
|
|
|
|
key: train_jcc
|
|
value: [0.88571429 0.91428571 0.88732394 0.86486486 0.875 0.87671233
|
|
0.86111111 0.86111111 0.88888889 0.875 ]
|
|
|
|
mean value: 0.8790012248405071
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.8227582 0.69282198 0.90757418 0.8929534 0.79432607 0.66682053
|
|
0.68143749 0.68580031 0.85271406 0.74548149]
|
|
|
|
mean value: 0.7742687702178955
|
|
|
|
key: score_time
|
|
value: [0.0130322 0.01566529 0.0124476 0.01416826 0.01481628 0.01479936
|
|
0.01499033 0.0139854 0.01176405 0.01219392]
|
|
|
|
mean value: 0.01378626823425293
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.62994079 0.73214286 0.37796447 0.37796447 0.6000992
|
|
1. 0.60714286 0.6000992 0.73214286]
|
|
|
|
mean value: 0.6157496702093695
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.97080136 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.997080136402387
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.8125 0.86666667 0.66666667 0.66666667 0.8
|
|
1. 0.8 0.8 0.86666667]
|
|
|
|
mean value: 0.8029166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.98540146 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985401459854014
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.8 0.85714286 0.70588235 0.70588235 0.76923077
|
|
1. 0.8 0.82352941 0.875 ]
|
|
|
|
mean value: 0.8086667744020685
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.98550725 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985507246376811
|
|
|
|
key: test_precision
|
|
value: [0.75 0.85714286 0.85714286 0.6 0.6 0.83333333
|
|
1. 0.85714286 0.77777778 0.875 ]
|
|
|
|
mean value: 0.8007539682539683
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.98550725 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985507246376811
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.85714286 0.85714286 0.71428571
|
|
1. 0.75 0.875 0.875 ]
|
|
|
|
mean value: 0.8285714285714285
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.98550725 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985507246376811
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.8125 0.86607143 0.67857143 0.67857143 0.79464286
|
|
1. 0.80357143 0.79464286 0.86607143]
|
|
|
|
mean value: 0.8044642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.98540068 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985400682011936
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.66666667 0.75 0.54545455 0.54545455 0.625
|
|
1. 0.66666667 0.7 0.77777778]
|
|
|
|
mean value: 0.6877020202020202
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.97142857 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971428571428571
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01229715 0.01017547 0.00983405 0.00973582 0.00940228 0.00935841
|
|
0.00854301 0.00982714 0.00898814 0.00932932]
|
|
|
|
mean value: 0.009749078750610351
|
|
|
|
key: score_time
|
|
value: [0.01544142 0.00982952 0.00961089 0.00950384 0.0091722 0.00923371
|
|
0.00845122 0.00983572 0.00892806 0.00875449]
|
|
|
|
mean value: 0.009876108169555664
|
|
|
|
key: test_mcc
|
|
value: [0.16012815 0.37796447 0.37796447 0.13363062 0.09449112 0.13363062
|
|
0.64465837 0.47245559 0.53452248 0.64465837]
|
|
|
|
mean value: 0.3574104277515264
|
|
|
|
key: train_mcc
|
|
value: [0.55346778 0.56668805 0.52625998 0.53840676 0.6802431 0.41306312
|
|
0.4644939 0.57097169 0.57327313 0.54116421]
|
|
|
|
mean value: 0.5428031730219792
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.6875 0.66666667 0.53333333 0.53333333 0.53333333
|
|
0.8 0.73333333 0.73333333 0.8 ]
|
|
|
|
mean value: 0.6583333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.75735294 0.77205882 0.73722628 0.74452555 0.83211679 0.66423358
|
|
0.7080292 0.75182482 0.75912409 0.74452555]
|
|
|
|
mean value: 0.7471017604121941
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.70588235 0.70588235 0.63157895 0.58823529 0.63157895
|
|
0.84210526 0.77777778 0.8 0.84210526]
|
|
|
|
mean value: 0.7191812865497076
|
|
|
|
key: train_fscore
|
|
value: [0.79503106 0.8 0.78571429 0.79041916 0.8496732 0.74444444
|
|
0.75903614 0.79761905 0.8 0.78787879]
|
|
|
|
mean value: 0.7909816130426526
|
|
|
|
key: test_precision
|
|
value: [0.53846154 0.66666667 0.6 0.5 0.5 0.5
|
|
0.72727273 0.7 0.66666667 0.72727273]
|
|
|
|
mean value: 0.6126340326340326
|
|
|
|
key: train_precision
|
|
value: [0.68817204 0.71264368 0.66666667 0.67346939 0.77380952 0.6036036
|
|
0.64285714 0.67 0.68041237 0.67010309]
|
|
|
|
mean value: 0.6781737509781237
|
|
|
|
key: test_recall
|
|
value: [0.875 0.75 0.85714286 0.85714286 0.71428571 0.85714286
|
|
1. 0.875 1. 1. ]
|
|
|
|
mean value: 0.8785714285714286
|
|
|
|
key: train_recall
|
|
value: [0.94117647 0.91176471 0.95652174 0.95652174 0.94202899 0.97101449
|
|
0.92647059 0.98529412 0.97058824 0.95588235]
|
|
|
|
mean value: 0.9517263427109974
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.6875 0.67857143 0.55357143 0.54464286 0.55357143
|
|
0.78571429 0.72321429 0.71428571 0.78571429]
|
|
|
|
mean value: 0.6589285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.75735294 0.77205882 0.73561381 0.74296675 0.83130861 0.66197783
|
|
0.70961211 0.75351662 0.76065644 0.74605712]
|
|
|
|
mean value: 0.7471121057118499
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.54545455 0.54545455 0.46153846 0.41666667 0.46153846
|
|
0.72727273 0.63636364 0.66666667 0.72727273]
|
|
|
|
mean value: 0.5688228438228439
|
|
|
|
key: train_jcc
|
|
value: [0.65979381 0.66666667 0.64705882 0.65346535 0.73863636 0.59292035
|
|
0.61165049 0.66336634 0.66666667 0.65 ]
|
|
|
|
mean value: 0.655022485751961
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00888562 0.00865746 0.00872469 0.00866175 0.01014066 0.00909781
|
|
0.00875616 0.00944185 0.0097394 0.00969982]
|
|
|
|
mean value: 0.009180521965026856
|
|
|
|
key: score_time
|
|
value: [0.00861907 0.00856471 0.00868225 0.00857139 0.0086987 0.00855017
|
|
0.00847864 0.01015139 0.00926304 0.00956607]
|
|
|
|
mean value: 0.008914542198181153
|
|
|
|
key: test_mcc
|
|
value: [ 0.13483997 0.57735027 -0.13363062 -0.07142857 0.18898224 -0.13363062
|
|
0.09449112 0.46770717 0.49099025 0. ]
|
|
|
|
mean value: 0.16156712094759243
|
|
|
|
key: train_mcc
|
|
value: [0.49671579 0.40057725 0.53887054 0.51413472 0.4644939 0.43214227
|
|
0.42270073 0.48120986 0.45617471 0.4221034 ]
|
|
|
|
mean value: 0.4629123161174943
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.75 0.46666667 0.46666667 0.6 0.46666667
|
|
0.53333333 0.66666667 0.73333333 0.46666667]
|
|
|
|
mean value: 0.57125
|
|
|
|
key: train_accuracy
|
|
value: [0.72794118 0.68382353 0.75182482 0.73722628 0.7080292 0.69343066
|
|
0.68613139 0.71532847 0.70072993 0.69343066]
|
|
|
|
mean value: 0.7097896092743667
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.66666667 0.2 0.42857143 0.5 0.2
|
|
0.46153846 0.54545455 0.71428571 0. ]
|
|
|
|
mean value: 0.4178055278055278
|
|
|
|
key: train_fscore
|
|
value: [0.66055046 0.60550459 0.70175439 0.67857143 0.62962963 0.61111111
|
|
0.58252427 0.62857143 0.60194175 0.61111111]
|
|
|
|
mean value: 0.6311270160248657
|
|
|
|
key: test_precision
|
|
value: [0.6 1. 0.33333333 0.42857143 0.6 0.33333333
|
|
0.6 1. 0.83333333 0. ]
|
|
|
|
mean value: 0.5728571428571428
|
|
|
|
key: train_precision
|
|
value: [0.87804878 0.80487805 0.88888889 0.88372093 0.87179487 0.84615385
|
|
0.85714286 0.89189189 0.88571429 0.825 ]
|
|
|
|
mean value: 0.8633234401087493
|
|
|
|
key: test_recall
|
|
value: [0.375 0.5 0.14285714 0.42857143 0.42857143 0.14285714
|
|
0.375 0.375 0.625 0. ]
|
|
|
|
mean value: 0.3392857142857143
|
|
|
|
key: train_recall
|
|
value: [0.52941176 0.48529412 0.57971014 0.55072464 0.49275362 0.47826087
|
|
0.44117647 0.48529412 0.45588235 0.48529412]
|
|
|
|
mean value: 0.49838022165387896
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.75 0.44642857 0.46428571 0.58928571 0.44642857
|
|
0.54464286 0.6875 0.74107143 0.5 ]
|
|
|
|
mean value: 0.5732142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.72794118 0.68382353 0.75309037 0.73859761 0.70961211 0.69501279
|
|
0.68435635 0.71366155 0.69895567 0.69192242]
|
|
|
|
mean value: 0.709697357203751
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.5 0.11111111 0.27272727 0.33333333 0.11111111
|
|
0.3 0.375 0.55555556 0. ]
|
|
|
|
mean value: 0.28588383838383835
|
|
|
|
key: train_jcc
|
|
value: [0.49315068 0.43421053 0.54054054 0.51351351 0.45945946 0.44
|
|
0.4109589 0.45833333 0.43055556 0.44 ]
|
|
|
|
mean value: 0.46207225177592876
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00963187 0.01003337 0.00963426 0.00960588 0.00966525 0.009413
|
|
0.01002789 0.01109171 0.00847435 0.0094924 ]
|
|
|
|
mean value: 0.009706997871398925
|
|
|
|
key: score_time
|
|
value: [0.01067805 0.01061988 0.01072097 0.01038003 0.01093984 0.01020074
|
|
0.01073027 0.01040125 0.00999022 0.01269126]
|
|
|
|
mean value: 0.010735249519348145
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0.18898224 0.18898224 -0.56407607 0.04029115
|
|
0.46770717 0.09449112 0.19642857 0.36689969]
|
|
|
|
mean value: 0.09797061022730223
|
|
|
|
key: train_mcc
|
|
value: [0.52157537 0.48089047 0.44223491 0.5228792 0.51877014 0.38868777
|
|
0.4470887 0.41148017 0.36257479 0.4540104 ]
|
|
|
|
mean value: 0.4550191926560739
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.5 0.6 0.6 0.26666667 0.53333333
|
|
0.66666667 0.53333333 0.6 0.6 ]
|
|
|
|
mean value: 0.54
|
|
|
|
key: train_accuracy
|
|
value: [0.75735294 0.73529412 0.71532847 0.75182482 0.75182482 0.68613139
|
|
0.71532847 0.69343066 0.67153285 0.72262774]
|
|
|
|
mean value: 0.7200676255903822
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.33333333 0.5 0.5 0. 0.36363636
|
|
0.54545455 0.46153846 0.625 0.4 ]
|
|
|
|
mean value: 0.4062296037296037
|
|
|
|
key: train_fscore
|
|
value: [0.736 0.70491803 0.68292683 0.71666667 0.72131148 0.63865546
|
|
0.66666667 0.625 0.60176991 0.68852459]
|
|
|
|
mean value: 0.678243963465158
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.6 0.6 0. 0.5 1. 0.6 0.625 1. ]
|
|
|
|
mean value: 0.5925
|
|
|
|
key: train_precision
|
|
value: [0.80701754 0.7962963 0.77777778 0.84313725 0.83018868 0.76
|
|
0.79591837 0.79545455 0.75555556 0.77777778]
|
|
|
|
mean value: 0.7939123798215785
|
|
|
|
key: test_recall
|
|
value: [0.25 0.25 0.42857143 0.42857143 0. 0.28571429
|
|
0.375 0.375 0.625 0.25 ]
|
|
|
|
mean value: 0.3267857142857143
|
|
|
|
key: train_recall
|
|
value: [0.67647059 0.63235294 0.60869565 0.62318841 0.63768116 0.55072464
|
|
0.57352941 0.51470588 0.5 0.61764706]
|
|
|
|
mean value: 0.5934995737425405
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.58928571 0.58928571 0.25 0.51785714
|
|
0.6875 0.54464286 0.59821429 0.625 ]
|
|
|
|
mean value: 0.5401785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.75735294 0.73529412 0.71611253 0.75277067 0.75266411 0.68712702
|
|
0.71430094 0.69213555 0.67028986 0.72186701]
|
|
|
|
mean value: 0.7199914748508098
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.2 0.33333333 0.33333333 0. 0.22222222
|
|
0.375 0.3 0.45454545 0.25 ]
|
|
|
|
mean value: 0.2668434343434343
|
|
|
|
key: train_jcc
|
|
value: [0.58227848 0.5443038 0.51851852 0.55844156 0.56410256 0.4691358
|
|
0.5 0.45454545 0.43037975 0.525 ]
|
|
|
|
mean value: 0.5146705923393687
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01341534 0.01017046 0.01016378 0.0106473 0.01015735 0.01011944
|
|
0.01013803 0.0111177 0.01139235 0.01143646]
|
|
|
|
mean value: 0.010875821113586426
|
|
|
|
key: score_time
|
|
value: [0.01139927 0.00892091 0.00900245 0.00960231 0.00909448 0.00901842
|
|
0.00965714 0.00929952 0.00991797 0.00989413]
|
|
|
|
mean value: 0.009580659866333007
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.51639778 0.6000992 0.09449112 0.07142857 0.46428571
|
|
0.60714286 0.33928571 0.47245559 0.6000992 ]
|
|
|
|
mean value: 0.41436502154582056
|
|
|
|
key: train_mcc
|
|
value: [0.78357455 0.808911 0.76781966 0.76951433 0.83951407 0.75191816
|
|
0.71056167 0.76668815 0.812277 0.75245474]
|
|
|
|
mean value: 0.7763233321347237
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.75 0.8 0.53333333 0.53333333 0.73333333
|
|
0.8 0.66666667 0.73333333 0.8 ]
|
|
|
|
mean value: 0.70375
|
|
|
|
key: train_accuracy
|
|
value: [0.88970588 0.90441176 0.88321168 0.88321168 0.91970803 0.87591241
|
|
0.8540146 0.88321168 0.90510949 0.87591241]
|
|
|
|
mean value: 0.8874409617861744
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.71428571 0.76923077 0.58823529 0.53333333 0.71428571
|
|
0.8 0.66666667 0.77777778 0.82352941]
|
|
|
|
mean value: 0.7054011348128995
|
|
|
|
key: train_fscore
|
|
value: [0.88372093 0.9037037 0.88059701 0.87878788 0.91970803 0.87591241
|
|
0.84615385 0.88059701 0.90076336 0.87218045]
|
|
|
|
mean value: 0.8842124636591383
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.83333333 0.5 0.5 0.71428571
|
|
0.85714286 0.71428571 0.7 0.77777778]
|
|
|
|
mean value: 0.7144444444444444
|
|
|
|
key: train_precision
|
|
value: [0.93442623 0.91044776 0.90769231 0.92063492 0.92647059 0.88235294
|
|
0.88709677 0.89393939 0.93650794 0.89230769]
|
|
|
|
mean value: 0.9091876545389791
|
|
|
|
key: test_recall
|
|
value: [0.625 0.625 0.71428571 0.71428571 0.57142857 0.71428571
|
|
0.75 0.625 0.875 0.875 ]
|
|
|
|
mean value: 0.7089285714285715
|
|
|
|
key: train_recall
|
|
value: [0.83823529 0.89705882 0.85507246 0.84057971 0.91304348 0.86956522
|
|
0.80882353 0.86764706 0.86764706 0.85294118]
|
|
|
|
mean value: 0.8610613810741687
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.75 0.79464286 0.54464286 0.53571429 0.73214286
|
|
0.80357143 0.66964286 0.72321429 0.79464286]
|
|
|
|
mean value: 0.7035714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.88970588 0.90441176 0.88341858 0.88352515 0.91975703 0.87595908
|
|
0.85368713 0.88309889 0.90483802 0.87574595]
|
|
|
|
mean value: 0.8874147485080989
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.55555556 0.625 0.41666667 0.36363636 0.55555556
|
|
0.66666667 0.5 0.63636364 0.7 ]
|
|
|
|
mean value: 0.5519444444444445
|
|
|
|
key: train_jcc
|
|
value: [0.79166667 0.82432432 0.78666667 0.78378378 0.85135135 0.77922078
|
|
0.73333333 0.78666667 0.81944444 0.77333333]
|
|
|
|
mean value: 0.792979134979135
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.57006025 0.7666266 0.59402704 0.56833172 0.68713737 0.53782296
|
|
0.62615681 0.55892277 0.80864668 0.57434893]
|
|
|
|
mean value: 0.6292081117630005
|
|
|
|
key: score_time
|
|
value: [0.01199007 0.01199818 0.01201677 0.01193786 0.01225781 0.01198459
|
|
0.01220012 0.01226211 0.01315689 0.01281214]
|
|
|
|
mean value: 0.012261652946472168
|
|
|
|
key: test_mcc
|
|
value: [0.25 0.40451992 0.73214286 0.33928571 0.26189246 0.47245559
|
|
0.60714286 0.21821789 0.32732684 0.56407607]
|
|
|
|
mean value: 0.41770602010269037
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.6875 0.86666667 0.66666667 0.6 0.73333333
|
|
0.8 0.6 0.66666667 0.73333333]
|
|
|
|
mean value: 0.6979166666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.61538462 0.85714286 0.66666667 0.66666667 0.66666667
|
|
0.8 0.57142857 0.70588235 0.66666667]
|
|
|
|
mean value: 0.6841505063563886
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.8 0.85714286 0.625 0.54545455 0.8
|
|
0.85714286 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: 0.7443073593073593
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.5 0.85714286 0.71428571 0.85714286 0.57142857
|
|
0.75 0.5 0.75 0.5 ]
|
|
|
|
mean value: 0.6625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.6875 0.86607143 0.66964286 0.61607143 0.72321429
|
|
0.80357143 0.60714286 0.66071429 0.75 ]
|
|
|
|
mean value: 0.7008928571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.44444444 0.75 0.5 0.5 0.5
|
|
0.66666667 0.4 0.54545455 0.5 ]
|
|
|
|
mean value: 0.5261111111111111
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01563907 0.01531076 0.01406741 0.01255441 0.01173449 0.01213789
|
|
0.01214099 0.01335692 0.01327562 0.01399612]
|
|
|
|
mean value: 0.013421368598937989
|
|
|
|
key: score_time
|
|
value: [0.01189351 0.00997782 0.0094521 0.00942326 0.00919056 0.0087719
|
|
0.00899625 0.00957489 0.00948572 0.0095222 ]
|
|
|
|
mean value: 0.009628820419311523
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.62994079 0.56407607 0.6000992 0.75592895 0.875
|
|
0.87287156 0.66143783 1. 0.73214286]
|
|
|
|
mean value: 0.7321438041535598
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.8125 0.73333333 0.8 0.86666667 0.93333333
|
|
0.93333333 0.8 1. 0.86666667]
|
|
|
|
mean value: 0.8558333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.82352941 0.77777778 0.76923077 0.83333333 0.93333333
|
|
0.94117647 0.76923077 1. 0.875 ]
|
|
|
|
mean value: 0.8522611865258924
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.77777778 0.63636364 0.83333333 1. 0.875
|
|
0.88888889 1. 1. 0.875 ]
|
|
|
|
mean value: 0.8743506493506493
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 1. 0.71428571 0.71428571 1.
|
|
1. 0.625 1. 0.875 ]
|
|
|
|
mean value: 0.8553571428571428
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.8125 0.75 0.79464286 0.85714286 0.9375
|
|
0.92857143 0.8125 1. 0.86607143]
|
|
|
|
mean value: 0.8571428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.7 0.63636364 0.625 0.71428571 0.875
|
|
0.88888889 0.625 1. 0.77777778]
|
|
|
|
mean value: 0.7508982683982683
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08946228 0.09461117 0.09797263 0.09215832 0.09182549 0.0985148
|
|
0.09922361 0.09643054 0.08978391 0.0917778 ]
|
|
|
|
mean value: 0.09417605400085449
|
|
|
|
key: score_time
|
|
value: [0.01735353 0.01883793 0.01911259 0.01854706 0.01913691 0.01901317
|
|
0.0190115 0.01745701 0.01824975 0.0189743 ]
|
|
|
|
mean value: 0.018569374084472658
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.5 0.33928571 0.07142857 0.21821789 0.75592895
|
|
0.60714286 0.49099025 0.6000992 0.6000992 ]
|
|
|
|
mean value: 0.48131334167892426
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.66666667 0.53333333 0.6 0.86666667
|
|
0.8 0.73333333 0.8 0.8 ]
|
|
|
|
mean value: 0.73625
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.75 0.66666667 0.53333333 0.625 0.83333333
|
|
0.8 0.71428571 0.82352941 0.82352941]
|
|
|
|
mean value: 0.7369677871148459
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.75 0.625 0.5 0.55555556 1.
|
|
0.85714286 0.83333333 0.77777778 0.77777778]
|
|
|
|
mean value: 0.7533730158730159
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.71428571 0.57142857 0.71428571 0.71428571
|
|
0.75 0.625 0.875 0.875 ]
|
|
|
|
mean value: 0.7339285714285715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.66964286 0.53571429 0.60714286 0.85714286
|
|
0.80357143 0.74107143 0.79464286 0.79464286]
|
|
|
|
mean value: 0.7366071428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.6 0.5 0.36363636 0.45454545 0.71428571
|
|
0.66666667 0.55555556 0.7 0.7 ]
|
|
|
|
mean value: 0.5921356421356421
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.009372 0.01048779 0.00980043 0.01002073 0.0098989 0.01009226
|
|
0.00951982 0.00987816 0.01003814 0.00992441]
|
|
|
|
mean value: 0.009903264045715333
|
|
|
|
key: score_time
|
|
value: [0.00917506 0.0095048 0.00926566 0.00949192 0.00945592 0.00941968
|
|
0.0089457 0.00947285 0.00952101 0.00941801]
|
|
|
|
mean value: 0.009367060661315919
|
|
|
|
key: test_mcc
|
|
value: [ 0.25 0.13483997 0.47245559 0.32732684 0.18898224 0.21821789
|
|
0.46428571 0.07142857 -0.07142857 0.33928571]
|
|
|
|
mean value: 0.23953939544202055
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.5625 0.73333333 0.66666667 0.6 0.6
|
|
0.73333333 0.53333333 0.46666667 0.66666667]
|
|
|
|
mean value: 0.61875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.63157895 0.66666667 0.61538462 0.5 0.625
|
|
0.75 0.53333333 0.5 0.66666667]
|
|
|
|
mean value: 0.6113630229419703
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.54545455 0.8 0.66666667 0.6 0.55555556
|
|
0.75 0.57142857 0.5 0.71428571]
|
|
|
|
mean value: 0.6328391053391054
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.75 0.57142857 0.57142857 0.42857143 0.71428571
|
|
0.75 0.5 0.5 0.625 ]
|
|
|
|
mean value: 0.6035714285714285
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.5625 0.72321429 0.66071429 0.58928571 0.60714286
|
|
0.73214286 0.53571429 0.46428571 0.66964286]
|
|
|
|
mean value: 0.6169642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.46153846 0.5 0.44444444 0.33333333 0.45454545
|
|
0.6 0.36363636 0.33333333 0.5 ]
|
|
|
|
mean value: 0.44453768453768455
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.24393058 1.24930692 1.20413995 1.24257207 1.28053522 1.26769328
|
|
1.27477527 1.21029258 1.16360927 1.15652037]
|
|
|
|
mean value: 1.2293375492095948
|
|
|
|
key: score_time
|
|
value: [0.09602118 0.09702611 0.09591722 0.09548616 0.09616089 0.09619951
|
|
0.09740233 0.09107471 0.08758807 0.09058619]
|
|
|
|
mean value: 0.09434623718261718
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.37796447 0.60714286 0.19642857 0.33928571 0.6000992
|
|
0.87287156 0.6000992 0.60714286 0.75592895]
|
|
|
|
mean value: 0.5586904164618322
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.6875 0.8 0.6 0.66666667 0.8
|
|
0.93333333 0.8 0.8 0.86666667]
|
|
|
|
mean value: 0.7766666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.70588235 0.8 0.57142857 0.66666667 0.76923077
|
|
0.94117647 0.82352941 0.8 0.88888889]
|
|
|
|
mean value: 0.7766803131509014
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.66666667 0.75 0.57142857 0.625 0.83333333
|
|
0.88888889 0.77777778 0.85714286 0.8 ]
|
|
|
|
mean value: 0.7627380952380952
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.57142857 0.71428571 0.71428571
|
|
1. 0.875 0.75 1. ]
|
|
|
|
mean value: 0.7982142857142858
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.6875 0.80357143 0.59821429 0.66964286 0.79464286
|
|
0.92857143 0.79464286 0.80357143 0.85714286]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.66666667 0.54545455 0.66666667 0.4 0.5 0.625
|
|
0.88888889 0.7 0.66666667 0.8 ]
|
|
|
|
mean value: 0.6459343434343434
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.82620525 0.91172338 0.95534968 0.86484671 0.87638474 0.8626883
|
|
0.84406471 0.89312744 0.88701892 0.92981744]
|
|
|
|
mean value: 0.8851226568222046
|
|
|
|
key: score_time
|
|
value: [0.21748114 0.14548421 0.18888092 0.11482406 0.20593166 0.22152257
|
|
0.24708962 0.18885756 0.23258686 0.23171401]
|
|
|
|
mean value: 0.19943726062774658
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.5 0.73214286 0.33928571 0.07142857 0.64465837
|
|
0.87287156 0.6000992 0.73214286 0.75592895]
|
|
|
|
mean value: 0.5878498864680419
|
|
|
|
key: train_mcc
|
|
value: [0.91176471 0.94158382 0.89863497 0.94160273 0.94160273 0.8978896
|
|
0.91281179 0.94160273 0.92791659 0.94201665]
|
|
|
|
mean value: 0.9257426297546791
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.86666667 0.66666667 0.53333333 0.8
|
|
0.93333333 0.8 0.86666667 0.86666667]
|
|
|
|
mean value: 0.7895833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.95588235 0.97058824 0.94890511 0.97080292 0.97080292 0.94890511
|
|
0.95620438 0.97080292 0.96350365 0.97080292]
|
|
|
|
mean value: 0.9627200515242593
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.75 0.85714286 0.66666667 0.53333333 0.72727273
|
|
0.94117647 0.82352941 0.875 0.88888889]
|
|
|
|
mean value: 0.7863010355657415
|
|
|
|
key: train_fscore
|
|
value: [0.95588235 0.97101449 0.95035461 0.97101449 0.97101449 0.94964029
|
|
0.95652174 0.97058824 0.96402878 0.97101449]
|
|
|
|
mean value: 0.9631073973057501
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.75 0.85714286 0.625 0.5 1.
|
|
0.88888889 0.77777778 0.875 0.8 ]
|
|
|
|
mean value: 0.7930952380952381
|
|
|
|
key: train_precision
|
|
value: [0.95588235 0.95714286 0.93055556 0.97101449 0.97101449 0.94285714
|
|
0.94285714 0.97058824 0.94366197 0.95714286]
|
|
|
|
mean value: 0.9542717101129082
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.71428571 0.57142857 0.57142857
|
|
1. 0.875 0.875 1. ]
|
|
|
|
mean value: 0.7964285714285714
|
|
|
|
key: train_recall
|
|
value: [0.95588235 0.98529412 0.97101449 0.97101449 0.97101449 0.95652174
|
|
0.97058824 0.97058824 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9722506393861893
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.86607143 0.66964286 0.53571429 0.78571429
|
|
0.92857143 0.79464286 0.86607143 0.85714286]
|
|
|
|
mean value: 0.7866071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.95588235 0.97058824 0.94874254 0.97080136 0.97080136 0.9488491
|
|
0.95630861 0.97080136 0.96366155 0.97090793]
|
|
|
|
mean value: 0.962734441602728
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.6 0.75 0.5 0.36363636 0.57142857
|
|
0.88888889 0.7 0.77777778 0.8 ]
|
|
|
|
mean value: 0.6618398268398268
|
|
|
|
key: train_jcc
|
|
value: [0.91549296 0.94366197 0.90540541 0.94366197 0.94366197 0.90410959
|
|
0.91666667 0.94285714 0.93055556 0.94366197]
|
|
|
|
mean value: 0.9289735204596289
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02292871 0.00963354 0.01028848 0.0096736 0.00978112 0.00908804
|
|
0.00900626 0.00889254 0.00899506 0.00930715]
|
|
|
|
mean value: 0.010759449005126953
|
|
|
|
key: score_time
|
|
value: [0.0128572 0.00932097 0.00932884 0.00900364 0.00890708 0.00880337
|
|
0.0087707 0.00877452 0.00871944 0.00882864]
|
|
|
|
mean value: 0.009331440925598145
|
|
|
|
key: test_mcc
|
|
value: [ 0.13483997 0.57735027 -0.13363062 -0.07142857 0.18898224 -0.13363062
|
|
0.09449112 0.46770717 0.49099025 0. ]
|
|
|
|
mean value: 0.16156712094759243
|
|
|
|
key: train_mcc
|
|
value: [0.49671579 0.40057725 0.53887054 0.51413472 0.4644939 0.43214227
|
|
0.42270073 0.48120986 0.45617471 0.4221034 ]
|
|
|
|
mean value: 0.4629123161174943
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.75 0.46666667 0.46666667 0.6 0.46666667
|
|
0.53333333 0.66666667 0.73333333 0.46666667]
|
|
|
|
mean value: 0.57125
|
|
|
|
key: train_accuracy
|
|
value: [0.72794118 0.68382353 0.75182482 0.73722628 0.7080292 0.69343066
|
|
0.68613139 0.71532847 0.70072993 0.69343066]
|
|
|
|
mean value: 0.7097896092743667
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.66666667 0.2 0.42857143 0.5 0.2
|
|
0.46153846 0.54545455 0.71428571 0. ]
|
|
|
|
mean value: 0.4178055278055278
|
|
|
|
key: train_fscore
|
|
value: [0.66055046 0.60550459 0.70175439 0.67857143 0.62962963 0.61111111
|
|
0.58252427 0.62857143 0.60194175 0.61111111]
|
|
|
|
mean value: 0.6311270160248657
|
|
|
|
key: test_precision
|
|
value: [0.6 1. 0.33333333 0.42857143 0.6 0.33333333
|
|
0.6 1. 0.83333333 0. ]
|
|
|
|
mean value: 0.5728571428571428
|
|
|
|
key: train_precision
|
|
value: [0.87804878 0.80487805 0.88888889 0.88372093 0.87179487 0.84615385
|
|
0.85714286 0.89189189 0.88571429 0.825 ]
|
|
|
|
mean value: 0.8633234401087493
|
|
|
|
key: test_recall
|
|
value: [0.375 0.5 0.14285714 0.42857143 0.42857143 0.14285714
|
|
0.375 0.375 0.625 0. ]
|
|
|
|
mean value: 0.3392857142857143
|
|
|
|
key: train_recall
|
|
value: [0.52941176 0.48529412 0.57971014 0.55072464 0.49275362 0.47826087
|
|
0.44117647 0.48529412 0.45588235 0.48529412]
|
|
|
|
mean value: 0.49838022165387896
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.75 0.44642857 0.46428571 0.58928571 0.44642857
|
|
0.54464286 0.6875 0.74107143 0.5 ]
|
|
|
|
mean value: 0.5732142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.72794118 0.68382353 0.75309037 0.73859761 0.70961211 0.69501279
|
|
0.68435635 0.71366155 0.69895567 0.69192242]
|
|
|
|
mean value: 0.709697357203751
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.5 0.11111111 0.27272727 0.33333333 0.11111111
|
|
0.3 0.375 0.55555556 0. ]
|
|
|
|
mean value: 0.28588383838383835
|
|
|
|
key: train_jcc
|
|
value: [0.49315068 0.43421053 0.54054054 0.51351351 0.45945946 0.44
|
|
0.4109589 0.45833333 0.43055556 0.44 ]
|
|
|
|
mean value: 0.46207225177592876
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.07995605 0.06297541 0.06544518 0.05764937 0.06188583 0.0586648
|
|
0.12230611 0.04721975 0.09068012 0.04617763]
|
|
|
|
mean value: 0.06929602622985839
|
|
|
|
key: score_time
|
|
value: [0.01092577 0.0114212 0.01099992 0.01117563 0.0114274 0.01075983
|
|
0.01074362 0.01329684 0.01136494 0.01041007]
|
|
|
|
mean value: 0.011252522468566895
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.8819171 0.49099025 0.46428571 0.6000992 0.87287156
|
|
0.87287156 0.875 1. 0.87287156]
|
|
|
|
mean value: 0.7560847740334493
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.9375 0.73333333 0.73333333 0.8 0.93333333
|
|
0.93333333 0.93333333 1. 0.93333333]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.94117647 0.75 0.71428571 0.76923077 0.92307692
|
|
0.94117647 0.93333333 1. 0.94117647]
|
|
|
|
mean value: 0.8713456151691445
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.88888889 0.66666667 0.71428571 0.83333333 1.
|
|
0.88888889 1. 1. 0.88888889]
|
|
|
|
mean value: 0.8738095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.85714286 0.71428571 0.71428571 0.85714286
|
|
1. 0.875 1. 1. ]
|
|
|
|
mean value: 0.8767857142857143
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.9375 0.74107143 0.73214286 0.79464286 0.92857143
|
|
0.92857143 0.9375 1. 0.92857143]
|
|
|
|
mean value: 0.8741071428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.88888889 0.6 0.55555556 0.625 0.85714286
|
|
0.88888889 0.875 1. 0.88888889]
|
|
|
|
mean value: 0.7846031746031745
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.86
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0330348 0.03623557 0.02312398 0.04735804 0.04877901 0.05488276
|
|
0.06896925 0.05981469 0.06169152 0.03810787]
|
|
|
|
mean value: 0.04719974994659424
|
|
|
|
key: score_time
|
|
value: [0.022084 0.01298809 0.01256323 0.01184058 0.01575994 0.02507162
|
|
0.02618265 0.02413273 0.03429985 0.0211916 ]
|
|
|
|
mean value: 0.02061142921447754
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.12598816 0.46428571 0.73214286 0.19642857 0.47245559
|
|
0.37796447 0.76376262 0.33928571 0.26189246]
|
|
|
|
mean value: 0.4250603937711905
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.5625 0.73333333 0.86666667 0.6 0.73333333
|
|
0.66666667 0.86666667 0.66666667 0.6 ]
|
|
|
|
mean value: 0.7045833333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.53333333 0.71428571 0.85714286 0.57142857 0.66666667
|
|
0.61538462 0.85714286 0.66666667 0.5 ]
|
|
|
|
mean value: 0.6696336996336996
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.57142857 0.71428571 0.85714286 0.57142857 0.8
|
|
0.8 1. 0.71428571 0.75 ]
|
|
|
|
mean value: 0.7611904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.5 0.71428571 0.85714286 0.57142857 0.57142857
|
|
0.5 0.75 0.625 0.375 ]
|
|
|
|
mean value: 0.6089285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.5625 0.73214286 0.86607143 0.59821429 0.72321429
|
|
0.67857143 0.875 0.66964286 0.61607143]
|
|
|
|
mean value: 0.7071428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.36363636 0.55555556 0.75 0.4 0.5
|
|
0.44444444 0.75 0.5 0.33333333]
|
|
|
|
mean value: 0.5152525252525253
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02253032 0.00990367 0.00890112 0.00878048 0.00946641 0.00981283
|
|
0.00976562 0.00911188 0.00904393 0.00897527]
|
|
|
|
mean value: 0.01062915325164795
|
|
|
|
key: score_time
|
|
value: [0.00994992 0.00903511 0.00864005 0.00946522 0.0089221 0.00871301
|
|
0.00903893 0.00893426 0.00859571 0.00905108]
|
|
|
|
mean value: 0.009034538269042968
|
|
|
|
key: test_mcc
|
|
value: [ 0.37796447 0.5 0.18898224 0.26189246 -0.19642857 0.47245559
|
|
0.46428571 0.46428571 0.33928571 0.34247476]
|
|
|
|
mean value: 0.32151980952229814
|
|
|
|
key: train_mcc
|
|
value: [0.47058824 0.4738791 0.4457507 0.53282182 0.48933032 0.45981668
|
|
0.43493568 0.51986449 0.50362319 0.50469525]
|
|
|
|
mean value: 0.48353054702816334
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.75 0.6 0.6 0.4 0.73333333
|
|
0.73333333 0.73333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6570833333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.73529412 0.73529412 0.72262774 0.76642336 0.74452555 0.72992701
|
|
0.71532847 0.75912409 0.75182482 0.75182482]
|
|
|
|
mean value: 0.7412194074710176
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.75 0.5 0.66666667 0.4 0.66666667
|
|
0.75 0.75 0.66666667 0.73684211]
|
|
|
|
mean value: 0.6553508771929825
|
|
|
|
key: train_fscore
|
|
value: [0.73529412 0.75 0.73239437 0.76811594 0.75177305 0.73381295
|
|
0.73103448 0.76595745 0.75 0.75714286]
|
|
|
|
mean value: 0.7475525211868894
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.75 0.6 0.54545455 0.375 0.8
|
|
0.75 0.75 0.71428571 0.63636364]
|
|
|
|
mean value: 0.663538961038961
|
|
|
|
key: train_precision
|
|
value: [0.73529412 0.71052632 0.71232877 0.76811594 0.73611111 0.72857143
|
|
0.68831169 0.73972603 0.75 0.73611111]
|
|
|
|
mean value: 0.7305096509091406
|
|
|
|
key: test_recall
|
|
value: [0.625 0.75 0.42857143 0.85714286 0.42857143 0.57142857
|
|
0.75 0.75 0.625 0.875 ]
|
|
|
|
mean value: 0.6660714285714285
|
|
|
|
key: train_recall
|
|
value: [0.73529412 0.79411765 0.75362319 0.76811594 0.76811594 0.73913043
|
|
0.77941176 0.79411765 0.75 0.77941176]
|
|
|
|
mean value: 0.7661338448422848
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.75 0.58928571 0.61607143 0.40178571 0.72321429
|
|
0.73214286 0.73214286 0.66964286 0.65178571]
|
|
|
|
mean value: 0.6553571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.73529412 0.73529412 0.72239983 0.76641091 0.74435209 0.72985934
|
|
0.71579284 0.75937766 0.75181159 0.75202472]
|
|
|
|
mean value: 0.7412617220801364
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.6 0.33333333 0.5 0.25 0.5
|
|
0.6 0.6 0.5 0.58333333]
|
|
|
|
mean value: 0.49666666666666665
|
|
|
|
key: train_jcc
|
|
value: [0.58139535 0.6 0.57777778 0.62352941 0.60227273 0.57954545
|
|
0.57608696 0.62068966 0.6 0.6091954 ]
|
|
|
|
mean value: 0.5970492734190879
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01225185 0.01327944 0.01416063 0.01370931 0.01541018 0.01700521
|
|
0.01550269 0.01642966 0.01558161 0.01649284]
|
|
|
|
mean value: 0.014982342720031738
|
|
|
|
key: score_time
|
|
value: [0.00872374 0.01088095 0.01173258 0.01138139 0.0119009 0.01152515
|
|
0.01247931 0.01200342 0.01153135 0.01145458]
|
|
|
|
mean value: 0.011361336708068848
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.48038446 0.875 0.37796447 0.19642857 0.6000992
|
|
0.87287156 0.49099025 0.6000992 0.66143783]
|
|
|
|
mean value: 0.5533240016901345
|
|
|
|
key: train_mcc
|
|
value: [0.82402205 0.64549722 0.95710706 0.82498207 0.87631485 1.
|
|
0.97080136 0.92944673 0.98550725 0.92944673]
|
|
|
|
mean value: 0.8943125328174313
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.6875 0.93333333 0.66666667 0.6 0.8
|
|
0.93333333 0.73333333 0.8 0.8 ]
|
|
|
|
mean value: 0.7641666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.90441176 0.79411765 0.97810219 0.90510949 0.93430657 1.
|
|
0.98540146 0.96350365 0.99270073 0.96350365]
|
|
|
|
mean value: 0.9421157148990983
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.54545455 0.93333333 0.70588235 0.57142857 0.76923077
|
|
0.94117647 0.71428571 0.82352941 0.76923077]
|
|
|
|
mean value: 0.7479434291198997
|
|
|
|
key: train_fscore
|
|
value: [0.91275168 0.74074074 0.9787234 0.91390728 0.93023256 1.
|
|
0.98529412 0.96183206 0.99270073 0.96183206]
|
|
|
|
mean value: 0.9378014635467626
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.875 0.6 0.57142857 0.83333333
|
|
0.88888889 0.83333333 0.77777778 1. ]
|
|
|
|
mean value: 0.8046428571428571
|
|
|
|
key: train_precision
|
|
value: [0.83950617 1. 0.95833333 0.84146341 1. 1.
|
|
0.98529412 1. 0.98550725 1. ]
|
|
|
|
mean value: 0.9610104284830856
|
|
|
|
key: test_recall
|
|
value: [0.75 0.375 1. 0.85714286 0.57142857 0.71428571
|
|
1. 0.625 0.875 0.625 ]
|
|
|
|
mean value: 0.7392857142857143
|
|
|
|
key: train_recall
|
|
value: [1. 0.58823529 1. 1. 0.86956522 1.
|
|
0.98529412 0.92647059 1. 0.92647059]
|
|
|
|
mean value: 0.9296035805626599
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.6875 0.9375 0.67857143 0.59821429 0.79464286
|
|
0.92857143 0.74107143 0.79464286 0.8125 ]
|
|
|
|
mean value: 0.7660714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.90441176 0.79411765 0.97794118 0.90441176 0.93478261 1.
|
|
0.98540068 0.96323529 0.99275362 0.96323529]
|
|
|
|
mean value: 0.9420289855072465
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.375 0.875 0.54545455 0.4 0.625
|
|
0.88888889 0.55555556 0.7 0.625 ]
|
|
|
|
mean value: 0.6135353535353535
|
|
|
|
key: train_jcc
|
|
value: [0.83950617 0.58823529 0.95833333 0.84146341 0.86956522 1.
|
|
0.97101449 0.92647059 0.98550725 0.92647059]
|
|
|
|
mean value: 0.890656634791696
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01423502 0.01331472 0.01325274 0.01306558 0.01300478 0.01313019
|
|
0.01375628 0.01323366 0.01340389 0.01344252]
|
|
|
|
mean value: 0.013383936882019044
|
|
|
|
key: score_time
|
|
value: [0.01171899 0.01145053 0.0114677 0.01151156 0.01143575 0.01140547
|
|
0.01144218 0.01145554 0.01145244 0.01148009]
|
|
|
|
mean value: 0.011482024192810058
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.57735027 0.36689969 0.49099025 0.02620712 0.53452248
|
|
0.875 0.46770717 0. 0.46770717]
|
|
|
|
mean value: 0.43063841665096625
|
|
|
|
key: train_mcc
|
|
value: [1. 0.76894131 0.46423351 0.85721269 0.729047 0.80073303
|
|
0.85977656 0.68130314 0.64876322 0.71619009]
|
|
|
|
mean value: 0.7526200558737186
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.6 0.73333333 0.53333333 0.73333333
|
|
0.93333333 0.66666667 0.53333333 0.66666667]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.875 0.67883212 0.9270073 0.84671533 0.89051095
|
|
0.9270073 0.81751825 0.79562044 0.83941606]
|
|
|
|
mean value: 0.8597627737226278
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 0.7 0.75 0.22222222 0.6
|
|
0.93333333 0.54545455 0.69565217 0.54545455]
|
|
|
|
mean value: 0.6408783487044357
|
|
|
|
key: train_fscore
|
|
value: [1. 0.85950413 0.75824176 0.93055556 0.82051282 0.87804878
|
|
0.921875 0.77477477 0.82926829 0.80701754]
|
|
|
|
mean value: 0.8579798658346695
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.53846154 0.66666667 0.5 1.
|
|
1. 1. 0.53333333 1. ]
|
|
|
|
mean value: 0.7988461538461539
|
|
|
|
key: train_precision
|
|
value: [1. 0.98113208 0.61061947 0.89333333 1. 1.
|
|
0.98333333 1. 0.70833333 1. ]
|
|
|
|
mean value: 0.9176751544498247
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 1. 0.85714286 0.14285714 0.42857143
|
|
0.875 0.375 1. 0.375 ]
|
|
|
|
mean value: 0.6303571428571428
|
|
|
|
key: train_recall
|
|
value: [1. 0.76470588 1. 0.97101449 0.69565217 0.7826087
|
|
0.86764706 0.63235294 1. 0.67647059]
|
|
|
|
mean value: 0.8390451832907075
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.625 0.74107143 0.50892857 0.71428571
|
|
0.9375 0.6875 0.5 0.6875 ]
|
|
|
|
mean value: 0.6901785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.875 0.67647059 0.92668372 0.84782609 0.89130435
|
|
0.92657715 0.81617647 0.79710145 0.83823529]
|
|
|
|
mean value: 0.8595375106564365
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 0.53846154 0.6 0.125 0.42857143
|
|
0.875 0.375 0.53333333 0.375 ]
|
|
|
|
mean value: 0.49503663003663
|
|
|
|
key: train_jcc
|
|
value: [1. 0.75362319 0.61061947 0.87012987 0.69565217 0.7826087
|
|
0.85507246 0.63235294 0.70833333 0.67647059]
|
|
|
|
mean value: 0.7584862723640647
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13326478 0.10330558 0.10409927 0.10355568 0.10456991 0.10327578
|
|
0.10341406 0.10458755 0.10527539 0.10435581]
|
|
|
|
mean value: 0.10697038173675537
|
|
|
|
key: score_time
|
|
value: [0.01640439 0.01446915 0.01444077 0.01433468 0.01447153 0.01458693
|
|
0.01456904 0.01437569 0.01455975 0.01480579]
|
|
|
|
mean value: 0.014701771736145019
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.8819171 0.66143783 0.76376262 0.73214286 0.6000992
|
|
1. 0.66143783 1. 0.87287156]
|
|
|
|
mean value: 0.7803609779630983
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.9375 0.8 0.86666667 0.86666667 0.8
|
|
1. 0.8 1. 0.93333333]
|
|
|
|
mean value: 0.8816666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.94117647 0.82352941 0.875 0.85714286 0.76923077
|
|
1. 0.76923077 1. 0.94117647]
|
|
|
|
mean value: 0.8776486748545572
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.88888889 0.7 0.77777778 0.85714286 0.83333333
|
|
1. 1. 1. 0.88888889]
|
|
|
|
mean value: 0.8803174603174603
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 1. 0.85714286 0.71428571
|
|
1. 0.625 1. 1. ]
|
|
|
|
mean value: 0.8946428571428572
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.9375 0.8125 0.875 0.86607143 0.79464286
|
|
1. 0.8125 1. 0.92857143]
|
|
|
|
mean value: 0.8839285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.88888889 0.7 0.77777778 0.75 0.625
|
|
1. 0.625 1. 0.88888889]
|
|
|
|
mean value: 0.7922222222222222
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.8
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03533721 0.03343058 0.03268671 0.03509784 0.03658581 0.04511404
|
|
0.04187083 0.03938723 0.05203009 0.05188322]
|
|
|
|
mean value: 0.0403423547744751
|
|
|
|
key: score_time
|
|
value: [0.01689053 0.02268291 0.02399755 0.01774144 0.03066397 0.08517981
|
|
0.02288914 0.02845573 0.03006124 0.02338696]
|
|
|
|
mean value: 0.030194926261901855
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.77459667 0.56407607 0.6000992 0.87287156 0.64465837
|
|
0.87287156 0.875 0.875 0.87287156]
|
|
|
|
mean value: 0.7581985784609153
|
|
|
|
key: train_mcc
|
|
value: [0.98540068 0.98540068 1. 0.98550725 0.98550418 1.
|
|
0.98550418 1. 0.97120941 1. ]
|
|
|
|
mean value: 0.9898526369281623
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.875 0.73333333 0.8 0.93333333 0.8
|
|
0.93333333 0.93333333 0.93333333 0.93333333]
|
|
|
|
mean value: 0.86875
|
|
|
|
key: train_accuracy
|
|
value: [0.99264706 0.99264706 1. 0.99270073 0.99270073 1.
|
|
0.99270073 1. 0.98540146 1. ]
|
|
|
|
mean value: 0.9948797767282096
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.88888889 0.77777778 0.76923077 0.92307692 0.72727273
|
|
0.94117647 0.93333333 0.93333333 0.94117647]
|
|
|
|
mean value: 0.8635266694090223
|
|
|
|
key: train_fscore
|
|
value: [0.99259259 0.99270073 1. 0.99270073 0.99280576 1.
|
|
0.99259259 1. 0.98507463 1. ]
|
|
|
|
mean value: 0.9948467027300555
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.8 0.63636364 0.83333333 1. 1.
|
|
0.88888889 1. 1. 0.88888889]
|
|
|
|
mean value: 0.8904617604617604
|
|
|
|
key: train_precision
|
|
value: [1. 0.98550725 1. 1. 0.98571429 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971221532091097
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.71428571 0.85714286 0.57142857
|
|
1. 0.875 0.875 1. ]
|
|
|
|
mean value: 0.8642857142857143
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 1. 0.98550725 1. 1.
|
|
0.98529412 1. 0.97058824 1. ]
|
|
|
|
mean value: 0.9926683716965047
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.875 0.75 0.79464286 0.92857143 0.78571429
|
|
0.92857143 0.9375 0.9375 0.92857143]
|
|
|
|
mean value: 0.8678571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 0.99264706 1. 0.99275362 0.99264706 1.
|
|
0.99264706 1. 0.98529412 1. ]
|
|
|
|
mean value: 0.9948635976129583
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.8 0.63636364 0.625 0.85714286 0.57142857
|
|
0.88888889 0.875 0.875 0.88888889]
|
|
|
|
mean value: 0.7684379509379509
|
|
|
|
key: train_jcc
|
|
value: [0.98529412 0.98550725 1. 0.98550725 0.98571429 1.
|
|
0.98529412 1. 0.97058824 1. ]
|
|
|
|
mean value: 0.9897905249056145
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03389168 0.04431558 0.04505873 0.05247664 0.04586053 0.04464412
|
|
0.04567122 0.04597712 0.04365826 0.04524136]
|
|
|
|
mean value: 0.04467952251434326
|
|
|
|
key: score_time
|
|
value: [0.0166564 0.0202353 0.02379227 0.02043128 0.02301693 0.02330208
|
|
0.02210164 0.02100325 0.02092814 0.02355409]
|
|
|
|
mean value: 0.021502137184143066
|
|
|
|
key: test_mcc
|
|
value: [ 0.28867513 0.37796447 0.46428571 0.19642857 -0.21821789 0.32732684
|
|
0.56407607 0.09449112 0.33928571 0.56407607]
|
|
|
|
mean value: 0.2998391820609876
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.98550725 0.98550725 1. 1.
|
|
0.98550418 1. 1. 1. ]
|
|
|
|
mean value: 0.9956518672042052
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.6875 0.73333333 0.6 0.4 0.66666667
|
|
0.73333333 0.53333333 0.66666667 0.73333333]
|
|
|
|
mean value: 0.6379166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99270073 0.99270073 1. 1.
|
|
0.99270073 1. 1. 1. ]
|
|
|
|
mean value: 0.9978102189781022
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.66666667 0.71428571 0.57142857 0.30769231 0.61538462
|
|
0.66666667 0.46153846 0.66666667 0.66666667]
|
|
|
|
mean value: 0.5836996336996336
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99270073 0.99270073 1. 1.
|
|
0.99259259 1. 1. 1. ]
|
|
|
|
mean value: 0.9977994052446607
|
|
|
|
key: test_precision
|
|
value: [0.75 0.71428571 0.71428571 0.57142857 0.33333333 0.66666667
|
|
1. 0.6 0.71428571 1. ]
|
|
|
|
mean value: 0.7064285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.375 0.625 0.71428571 0.57142857 0.28571429 0.57142857
|
|
0.5 0.375 0.625 0.5 ]
|
|
|
|
mean value: 0.5142857142857142
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.98550725 0.98550725 1. 1.
|
|
0.98529412 1. 1. 1. ]
|
|
|
|
mean value: 0.9956308610400683
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.6875 0.73214286 0.59821429 0.39285714 0.66071429
|
|
0.75 0.54464286 0.66964286 0.75 ]
|
|
|
|
mean value: 0.6410714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99275362 0.99275362 1. 1.
|
|
0.99264706 1. 1. 1. ]
|
|
|
|
mean value: 0.9978154305200341
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.5 0.55555556 0.4 0.18181818 0.44444444
|
|
0.5 0.3 0.5 0.5 ]
|
|
|
|
mean value: 0.4215151515151515
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.98550725 0.98550725 1. 1.
|
|
0.98529412 1. 1. 1. ]
|
|
|
|
mean value: 0.9956308610400683
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.32355213 0.3026855 0.29724026 0.29959607 0.31641984 0.30995607
|
|
0.30303383 0.30751705 0.30891609 0.30699992]
|
|
|
|
mean value: 0.30759167671203613
|
|
|
|
key: score_time
|
|
value: [0.0100224 0.00898337 0.00908351 0.00956535 0.00903773 0.00964832
|
|
0.008991 0.00919938 0.00907493 0.00935054]
|
|
|
|
mean value: 0.009295654296875
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.77459667 0.56407607 0.32732684 0.6000992 0.87287156
|
|
0.87287156 1. 1. 0.87287156]
|
|
|
|
mean value: 0.7514654248742838
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.875 0.73333333 0.66666667 0.8 0.93333333
|
|
0.93333333 1. 1. 0.93333333]
|
|
|
|
mean value: 0.86875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.88888889 0.77777778 0.61538462 0.76923077 0.92307692
|
|
0.94117647 1. 1. 0.94117647]
|
|
|
|
mean value: 0.8656711915535444
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.8 0.63636364 0.66666667 0.83333333 1.
|
|
0.88888889 1. 1. 0.88888889]
|
|
|
|
mean value: 0.8571284271284271
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.57142857 0.71428571 0.85714286
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.8892857142857142
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.875 0.75 0.66071429 0.79464286 0.92857143
|
|
0.92857143 1. 1. 0.92857143]
|
|
|
|
mean value: 0.8678571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.8 0.63636364 0.44444444 0.625 0.85714286
|
|
0.88888889 1. 1. 0.88888889]
|
|
|
|
mean value: 0.7807395382395382
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01618242 0.01694918 0.0171454 0.0173583 0.01694083 0.01708293
|
|
0.02457333 0.0169518 0.01739001 0.02177453]
|
|
|
|
mean value: 0.018234872817993165
|
|
|
|
key: score_time
|
|
value: [0.01190066 0.01173997 0.01192951 0.01214218 0.01183653 0.01177335
|
|
0.0120542 0.0117383 0.01184988 0.01208735]
|
|
|
|
mean value: 0.011905193328857422
|
|
|
|
key: test_mcc
|
|
value: [ 0.51639778 0.51639778 0.47245559 0.46428571 0.34247476 -0.26189246
|
|
0.66143783 0.46770717 0.33928571 0.21821789]
|
|
|
|
mean value: 0.37367677665730276
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.73333333 0.73333333 0.66666667 0.4
|
|
0.8 0.66666667 0.66666667 0.6 ]
|
|
|
|
mean value: 0.6766666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.71428571 0.66666667 0.71428571 0.54545455 0.18181818
|
|
0.76923077 0.54545455 0.66666667 0.57142857]
|
|
|
|
mean value: 0.608957708957709
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.83333333 0.8 0.71428571 0.75 0.25
|
|
1. 1. 0.71428571 0.66666667]
|
|
|
|
mean value: 0.7561904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.625 0.57142857 0.71428571 0.42857143 0.14285714
|
|
0.625 0.375 0.625 0.5 ]
|
|
|
|
mean value: 0.5232142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.72321429 0.73214286 0.65178571 0.38392857
|
|
0.8125 0.6875 0.66964286 0.60714286]
|
|
|
|
mean value: 0.6767857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.55555556 0.5 0.55555556 0.375 0.1
|
|
0.625 0.375 0.5 0.4 ]
|
|
|
|
mean value: 0.45416666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03477263 0.01746392 0.01311684 0.01299357 0.01324058 0.01971269
|
|
0.03290296 0.03135729 0.0329752 0.03273535]
|
|
|
|
mean value: 0.024127101898193358
|
|
|
|
key: score_time
|
|
value: [0.02140951 0.01165438 0.01148176 0.01163721 0.01150393 0.01427007
|
|
0.02292275 0.01153278 0.0225656 0.01324725]
|
|
|
|
mean value: 0.015222525596618653
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.40451992 0.73214286 0.60714286 0.6000992 0.6000992
|
|
1. 0.60714286 0.6000992 0.73214286]
|
|
|
|
mean value: 0.6383388940496311
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97058824 1. 1. 0.97080136 0.98550418
|
|
0.97080136 0.97120941 0.98550418 0.97120941]
|
|
|
|
mean value: 0.9825618145575302
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.6875 0.86666667 0.8 0.8 0.8
|
|
1. 0.8 0.8 0.86666667]
|
|
|
|
mean value: 0.8170833333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98529412 1. 1. 0.98540146 0.99270073
|
|
0.98540146 0.98540146 0.99270073 0.98540146]
|
|
|
|
mean value: 0.9912301416917132
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.61538462 0.85714286 0.8 0.76923077 0.76923077
|
|
1. 0.8 0.82352941 0.875 ]
|
|
|
|
mean value: 0.8059518422753716
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98529412 1. 1. 0.98550725 0.99280576
|
|
0.98529412 0.98507463 0.99259259 0.98507463]
|
|
|
|
mean value: 0.9911643083390549
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8 0.85714286 0.75 0.83333333 0.83333333
|
|
1. 0.85714286 0.77777778 0.875 ]
|
|
|
|
mean value: 0.8333730158730159
|
|
|
|
key: train_precision
|
|
value: [1. 0.98529412 1. 1. 0.98550725 0.98571429
|
|
0.98529412 1. 1. 1. ]
|
|
|
|
mean value: 0.9941809767385215
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.85714286 0.85714286 0.71428571 0.71428571
|
|
1. 0.75 0.875 0.875 ]
|
|
|
|
mean value: 0.7892857142857143
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[1. 0.98529412 1. 1. 0.98550725 1.
|
|
0.98529412 0.97058824 0.98529412 0.97058824]
|
|
|
|
mean value: 0.9882566069906223
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.6875 0.86607143 0.80357143 0.79464286 0.79464286
|
|
1. 0.80357143 0.79464286 0.86607143]
|
|
|
|
mean value: 0.8160714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98529412 1. 1. 0.98540068 0.99264706
|
|
0.98540068 0.98529412 0.99264706 0.98529412]
|
|
|
|
mean value: 0.9911977834612106
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.44444444 0.75 0.66666667 0.625 0.625
|
|
1. 0.66666667 0.7 0.77777778]
|
|
|
|
mean value: 0.6855555555555556
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97101449 1. 1. 0.97142857 0.98571429
|
|
0.97101449 0.97058824 0.98529412 0.97058824]
|
|
|
|
mean value: 0.9825642430885397
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.19766355 0.1977067 0.26526546 0.23320699 0.20369792 0.25036335
|
|
0.25942421 0.25369072 0.20481992 0.21358204]
|
|
|
|
mean value: 0.2279420852661133
|
|
|
|
key: score_time
|
|
value: [0.0231607 0.01829505 0.01569152 0.02334714 0.01330042 0.02818418
|
|
0.01925683 0.03158617 0.0225687 0.0191071 ]
|
|
|
|
mean value: 0.021449780464172362
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.5 0.6000992 0.60714286 0.6000992 0.6000992
|
|
1. 0.73214286 0.60714286 0.73214286]
|
|
|
|
mean value: 0.6608809811367078
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 0.97080136 0.98550418
|
|
0.97080136 1. 1. 0.97120941]
|
|
|
|
mean value: 0.9898316319164463
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.8 0.8 0.8 0.8
|
|
1. 0.86666667 0.8 0.86666667]
|
|
|
|
mean value: 0.8295833333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 0.98540146 0.99270073
|
|
0.98540146 1. 1. 0.98540146]
|
|
|
|
mean value: 0.9948905109489051
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.75 0.76923077 0.8 0.76923077 0.76923077
|
|
1. 0.875 0.8 0.875 ]
|
|
|
|
mean value: 0.8207692307692307
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 0.98550725 0.99280576
|
|
0.98529412 1. 1. 0.98507463]
|
|
|
|
mean value: 0.9948681746285226
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.75 0.83333333 0.75 0.83333333 0.83333333
|
|
1. 0.875 0.85714286 0.875 ]
|
|
|
|
mean value: 0.8464285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.98550725 0.98571429
|
|
0.98529412 1. 1. 1. ]
|
|
|
|
mean value: 0.9956515649738156
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.71428571 0.85714286 0.71428571 0.71428571
|
|
1. 0.875 0.75 0.875 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98550725 1.
|
|
0.98529412 1. 1. 0.97058824]
|
|
|
|
mean value: 0.9941389599317988
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.79464286 0.80357143 0.79464286 0.79464286
|
|
1. 0.86607143 0.80357143 0.86607143]
|
|
|
|
mean value: 0.8285714285714285
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 0.98540068 0.99264706
|
|
0.98540068 1. 1. 0.98529412]
|
|
|
|
mean value: 0.9948742540494458
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.6 0.625 0.66666667 0.625 0.625
|
|
1. 0.77777778 0.66666667 0.77777778]
|
|
|
|
mean value: 0.7030555555555555
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 0.97142857 0.98571429
|
|
0.97101449 1. 1. 0.97058824]
|
|
|
|
mean value: 0.9898745585190598
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04320121 0.02958608 0.02821326 0.02681899 0.02457881 0.03055382
|
|
0.0259335 0.02716541 0.02808213 0.04537916]
|
|
|
|
mean value: 0.03095123767852783
|
|
|
|
key: score_time
|
|
value: [0.01172972 0.01138425 0.01140666 0.01142383 0.01142263 0.01410961
|
|
0.01172948 0.01139331 0.01142216 0.01186991]
|
|
|
|
mean value: 0.011789155006408692
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.51639778 0.6000992 0.49099025 0.34247476 0.73214286
|
|
0.87287156 0.32732684 0.75592895 0.73214286]
|
|
|
|
mean value: 0.5870375046987197
|
|
|
|
key: train_mcc
|
|
value: [0.83832595 0.8722811 0.85739162 0.85400682 0.90025835 0.85739162
|
|
0.87308606 0.81433714 0.85434012 0.8251228 ]
|
|
|
|
mean value: 0.8546541589888493
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.8 0.73333333 0.66666667 0.86666667
|
|
0.93333333 0.66666667 0.86666667 0.86666667]
|
|
|
|
mean value: 0.79
|
|
|
|
key: train_accuracy
|
|
value: [0.91911765 0.93382353 0.9270073 0.9270073 0.94890511 0.9270073
|
|
0.93430657 0.90510949 0.9270073 0.91240876]
|
|
|
|
mean value: 0.9261700300558179
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.71428571 0.76923077 0.75 0.54545455 0.85714286
|
|
0.94117647 0.70588235 0.88888889 0.875 ]
|
|
|
|
mean value: 0.7797061598532187
|
|
|
|
key: train_fscore
|
|
value: [0.91970803 0.93023256 0.92424242 0.92753623 0.94736842 0.92424242
|
|
0.93023256 0.89922481 0.92537313 0.91044776]
|
|
|
|
mean value: 0.9238608348621626
|
|
|
|
key: test_precision
|
|
value: [0.75 0.83333333 0.83333333 0.66666667 0.75 0.85714286
|
|
0.88888889 0.66666667 0.8 0.875 ]
|
|
|
|
mean value: 0.7921031746031746
|
|
|
|
key: train_precision
|
|
value: [0.91304348 0.98360656 0.96825397 0.92753623 0.984375 0.96825397
|
|
0.98360656 0.95081967 0.93939394 0.92424242]
|
|
|
|
mean value: 0.9543131797174473
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.71428571 0.85714286 0.42857143 0.85714286
|
|
1. 0.75 1. 0.875 ]
|
|
|
|
mean value: 0.7857142857142857
|
|
|
|
key: train_recall
|
|
value: [0.92647059 0.88235294 0.88405797 0.92753623 0.91304348 0.88405797
|
|
0.88235294 0.85294118 0.91176471 0.89705882]
|
|
|
|
mean value: 0.8961636828644501
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.79464286 0.74107143 0.65178571 0.86607143
|
|
0.92857143 0.66071429 0.85714286 0.86607143]
|
|
|
|
mean value: 0.7866071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.91911765 0.93382353 0.9273231 0.92700341 0.9491688 0.9273231
|
|
0.93393009 0.90473146 0.92689685 0.91229753]
|
|
|
|
mean value: 0.9261615515771526
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.55555556 0.625 0.6 0.375 0.75
|
|
0.88888889 0.54545455 0.8 0.77777778]
|
|
|
|
mean value: 0.6517676767676768
|
|
|
|
key: train_jcc
|
|
value: [0.85135135 0.86956522 0.85915493 0.86486486 0.9 0.85915493
|
|
0.86956522 0.81690141 0.86111111 0.83561644]
|
|
|
|
mean value: 0.8587285468071735
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.72472978 0.75495839 0.73936343 0.75687647 0.84142971 0.64613605
|
|
0.65643215 0.76040888 0.65476346 0.65726519]
|
|
|
|
mean value: 0.7192363500595093
|
|
|
|
key: score_time
|
|
value: [0.01454163 0.02372313 0.01549673 0.01228714 0.01491833 0.01502371
|
|
0.01498818 0.01491761 0.01482344 0.014889 ]
|
|
|
|
mean value: 0.01556088924407959
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.8819171 0.87287156 0.60714286 0.47245559 0.6000992
|
|
0.87287156 0.6000992 0.6000992 0.875 ]
|
|
|
|
mean value: 0.7012497056776176
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.9375 0.93333333 0.8 0.73333333 0.8
|
|
0.93333333 0.8 0.8 0.93333333]
|
|
|
|
mean value: 0.8483333333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.93333333 0.92307692 0.8 0.66666667 0.76923077
|
|
0.94117647 0.82352941 0.82352941 0.93333333]
|
|
|
|
mean value: 0.8413876319758673
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 1. 1. 0.75 0.8 0.83333333
|
|
0.88888889 0.77777778 0.77777778 1. ]
|
|
|
|
mean value: 0.8684920634920635
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.85714286 0.85714286 0.57142857 0.71428571
|
|
1. 0.875 0.875 0.875 ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.9375 0.92857143 0.80357143 0.72321429 0.79464286
|
|
0.92857143 0.79464286 0.79464286 0.9375 ]
|
|
|
|
mean value: 0.8455357142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.875 0.85714286 0.66666667 0.5 0.625
|
|
0.88888889 0.7 0.7 0.875 ]
|
|
|
|
mean value: 0.7354365079365079
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01848245 0.0101335 0.00885034 0.00875592 0.00899553 0.00864291
|
|
0.00851798 0.00969911 0.00897145 0.0086875 ]
|
|
|
|
mean value: 0.009973669052124023
|
|
|
|
key: score_time
|
|
value: [0.01245618 0.00936365 0.00951314 0.00900316 0.00852799 0.0085516
|
|
0.00870609 0.00925064 0.00872588 0.00880647]
|
|
|
|
mean value: 0.009290480613708496
|
|
|
|
key: test_mcc
|
|
value: [ 0.40451992 0.13483997 0.37796447 -0.02620712 0.49099025 -0.32732684
|
|
0.64465837 0.02620712 0.02620712 0.47245559]
|
|
|
|
mean value: 0.22243088640567016
|
|
|
|
key: train_mcc
|
|
value: [0.53653217 0.4793114 0.49914682 0.46104278 0.6120283 0.34901614
|
|
0.39366754 0.46000512 0.44762296 0.45748511]
|
|
|
|
mean value: 0.46958583430166345
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.5625 0.66666667 0.46666667 0.73333333 0.33333333
|
|
0.8 0.53333333 0.53333333 0.73333333]
|
|
|
|
mean value: 0.605
|
|
|
|
key: train_accuracy
|
|
value: [0.74264706 0.72794118 0.71532847 0.7080292 0.79562044 0.6350365
|
|
0.67153285 0.70072993 0.69343066 0.7080292 ]
|
|
|
|
mean value: 0.709832546157149
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.63157895 0.70588235 0.6 0.75 0.375
|
|
0.84210526 0.66666667 0.66666667 0.77777778]
|
|
|
|
mean value: 0.6752519779841761
|
|
|
|
key: train_fscore
|
|
value: [0.78787879 0.76433121 0.77456647 0.76190476 0.82051282 0.72527473
|
|
0.73372781 0.75739645 0.75294118 0.75609756]
|
|
|
|
mean value: 0.7634631777551846
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.54545455 0.6 0.46153846 0.66666667 0.33333333
|
|
0.72727273 0.53846154 0.53846154 0.7 ]
|
|
|
|
mean value: 0.5747552447552448
|
|
|
|
key: train_precision
|
|
value: [0.67010309 0.6741573 0.64423077 0.64646465 0.73563218 0.5840708
|
|
0.61386139 0.63366337 0.62745098 0.64583333]
|
|
|
|
mean value: 0.6475467858418669
|
|
|
|
key: test_recall
|
|
value: [0.875 0.75 0.85714286 0.85714286 0.85714286 0.42857143
|
|
1. 0.875 0.875 0.875 ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [0.95588235 0.88235294 0.97101449 0.92753623 0.92753623 0.95652174
|
|
0.91176471 0.94117647 0.94117647 0.91176471]
|
|
|
|
mean value: 0.9326726342710997
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.5625 0.67857143 0.49107143 0.74107143 0.33928571
|
|
0.78571429 0.50892857 0.50892857 0.72321429]
|
|
|
|
mean value: 0.6026785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.74264706 0.72794118 0.71344842 0.70641517 0.79465047 0.63267263
|
|
0.67327366 0.70247229 0.69522592 0.70950554]
|
|
|
|
mean value: 0.7098252344416027
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.46153846 0.54545455 0.42857143 0.6 0.23076923
|
|
0.72727273 0.5 0.5 0.63636364]
|
|
|
|
mean value: 0.5213303363303363
|
|
|
|
key: train_jcc
|
|
value: [0.65 0.6185567 0.63207547 0.61538462 0.69565217 0.56896552
|
|
0.57943925 0.60952381 0.60377358 0.60784314]
|
|
|
|
mean value: 0.61812142632889
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00905561 0.00891018 0.00886559 0.00871682 0.00850534 0.00852299
|
|
0.00978875 0.00960135 0.0091455 0.00922894]
|
|
|
|
mean value: 0.009034109115600587
|
|
|
|
key: score_time
|
|
value: [0.00863028 0.00875092 0.00873446 0.00840735 0.0085628 0.00858498
|
|
0.00982428 0.00897503 0.00913978 0.00879216]
|
|
|
|
mean value: 0.008840203285217285
|
|
|
|
key: test_mcc
|
|
value: [ 0.12598816 0.25819889 -0.26189246 -0.04029115 0.20044593 0.04029115
|
|
0.09449112 0.37796447 -0.02620712 -0.02620712]
|
|
|
|
mean value: 0.07427818649684113
|
|
|
|
key: train_mcc
|
|
value: [0.37721677 0.34459104 0.49711106 0.44876452 0.42884415 0.35028846
|
|
0.33060837 0.4337651 0.45582775 0.35292142]
|
|
|
|
mean value: 0.4019938637561584
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.625 0.4 0.46666667 0.6 0.53333333
|
|
0.53333333 0.66666667 0.46666667 0.46666667]
|
|
|
|
mean value: 0.5320833333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.67647059 0.66911765 0.73722628 0.72262774 0.7080292 0.67153285
|
|
0.64963504 0.7080292 0.71532847 0.67153285]
|
|
|
|
mean value: 0.6929529841133534
|
|
|
|
key: test_fscore
|
|
value: [0.53333333 0.57142857 0.18181818 0.55555556 0.4 0.36363636
|
|
0.46153846 0.61538462 0.2 0.2 ]
|
|
|
|
mean value: 0.4082695082695083
|
|
|
|
key: train_fscore
|
|
value: [0.60714286 0.63414634 0.69491525 0.70769231 0.67213115 0.64
|
|
0.54716981 0.65517241 0.65486726 0.62184874]
|
|
|
|
mean value: 0.6435086129323676
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.66666667 0.25 0.45454545 0.66666667 0.5
|
|
0.6 0.8 0.5 0.5 ]
|
|
|
|
mean value: 0.5509307359307359
|
|
|
|
key: train_precision
|
|
value: [0.77272727 0.70909091 0.83673469 0.75409836 0.77358491 0.71428571
|
|
0.76315789 0.79166667 0.82222222 0.7254902 ]
|
|
|
|
mean value: 0.7663058836001725
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.14285714 0.71428571 0.28571429 0.28571429
|
|
0.375 0.5 0.125 0.125 ]
|
|
|
|
mean value: 0.35535714285714287
|
|
|
|
key: train_recall
|
|
value: [0.5 0.57352941 0.5942029 0.66666667 0.5942029 0.57971014
|
|
0.42647059 0.55882353 0.54411765 0.54411765]
|
|
|
|
mean value: 0.5581841432225064
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.625 0.38392857 0.48214286 0.58035714 0.51785714
|
|
0.54464286 0.67857143 0.49107143 0.49107143]
|
|
|
|
mean value: 0.5357142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.67647059 0.66911765 0.73827792 0.72303922 0.70886616 0.67220801
|
|
0.6480179 0.706948 0.71408781 0.67060955]
|
|
|
|
mean value: 0.6927642796248934
|
|
|
|
key: test_jcc
|
|
value: [0.36363636 0.4 0.1 0.38461538 0.25 0.22222222
|
|
0.3 0.44444444 0.11111111 0.11111111]
|
|
|
|
mean value: 0.2687140637140637
|
|
|
|
key: train_jcc
|
|
value: [0.43589744 0.46428571 0.53246753 0.54761905 0.50617284 0.47058824
|
|
0.37662338 0.48717949 0.48684211 0.45121951]
|
|
|
|
mean value: 0.47588952863311645
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00864506 0.00828457 0.00928378 0.00875235 0.0098598 0.00966191
|
|
0.00941873 0.00864959 0.00838876 0.00964999]
|
|
|
|
mean value: 0.009059453010559082
|
|
|
|
key: score_time
|
|
value: [0.01497006 0.01048112 0.00997782 0.01029468 0.01638317 0.01446795
|
|
0.01556301 0.01014423 0.00980687 0.01054168]
|
|
|
|
mean value: 0.012263059616088867
|
|
|
|
key: test_mcc
|
|
value: [ 0.25819889 -0.25 -0.18898224 -0.32732684 -0.19642857 -0.19642857
|
|
0.07142857 -0.19642857 0.07142857 -0.07142857]
|
|
|
|
mean value: -0.1025967324968584
|
|
|
|
key: train_mcc
|
|
value: [0.48788604 0.44290395 0.37233589 0.41800514 0.37998495 0.41800514
|
|
0.38048979 0.45151662 0.47592003 0.4457507 ]
|
|
|
|
mean value: 0.4272798246395313
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.375 0.4 0.33333333 0.4 0.4
|
|
0.53333333 0.4 0.53333333 0.46666667]
|
|
|
|
mean value: 0.44666666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.74264706 0.72058824 0.68613139 0.7080292 0.68613139 0.7080292
|
|
0.68613139 0.72262774 0.73722628 0.72262774]
|
|
|
|
mean value: 0.7120169600686991
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.375 0.47058824 0.375 0.4 0.4
|
|
0.53333333 0.4 0.53333333 0.5 ]
|
|
|
|
mean value: 0.45586834733893555
|
|
|
|
key: train_fscore
|
|
value: [0.72868217 0.70769231 0.68613139 0.6969697 0.656 0.6969697
|
|
0.6446281 0.69354839 0.72307692 0.71212121]
|
|
|
|
mean value: 0.6945819880504114
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.375 0.4 0.33333333 0.375 0.375
|
|
0.57142857 0.42857143 0.57142857 0.5 ]
|
|
|
|
mean value: 0.45964285714285713
|
|
|
|
key: train_precision
|
|
value: [0.7704918 0.74193548 0.69117647 0.73015873 0.73214286 0.73015873
|
|
0.73584906 0.76785714 0.75806452 0.734375 ]
|
|
|
|
mean value: 0.7392209790788158
|
|
|
|
key: test_recall
|
|
value: [0.5 0.375 0.57142857 0.42857143 0.42857143 0.42857143
|
|
0.5 0.375 0.5 0.5 ]
|
|
|
|
mean value: 0.4607142857142857
|
|
|
|
key: train_recall
|
|
value: [0.69117647 0.67647059 0.68115942 0.66666667 0.5942029 0.66666667
|
|
0.57352941 0.63235294 0.69117647 0.69117647]
|
|
|
|
mean value: 0.6564578005115089
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.375 0.41071429 0.33928571 0.40178571 0.40178571
|
|
0.53571429 0.40178571 0.53571429 0.46428571]
|
|
|
|
mean value: 0.44910714285714287
|
|
|
|
key: train_roc_auc
|
|
value: [0.74264706 0.72058824 0.68616795 0.70833333 0.68680733 0.70833333
|
|
0.68531543 0.72197357 0.73689258 0.72239983]
|
|
|
|
mean value: 0.7119458653026428
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.23076923 0.30769231 0.23076923 0.25 0.25
|
|
0.36363636 0.25 0.36363636 0.33333333]
|
|
|
|
mean value: 0.297983682983683
|
|
|
|
key: train_jcc
|
|
value: [0.57317073 0.54761905 0.52222222 0.53488372 0.48809524 0.53488372
|
|
0.47560976 0.5308642 0.56626506 0.55294118]
|
|
|
|
mean value: 0.5326554871844268
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01210165 0.01177716 0.01163483 0.01132727 0.01148891 0.0114069
|
|
0.01160097 0.01141071 0.01152205 0.01083183]
|
|
|
|
mean value: 0.011510229110717774
|
|
|
|
key: score_time
|
|
value: [0.01029778 0.01001024 0.00988793 0.00980735 0.00970197 0.00995755
|
|
0.00985241 0.00983047 0.00946426 0.0092001 ]
|
|
|
|
mean value: 0.009801006317138672
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.37796447 0.46428571 0.09449112 0.32732684 0.47245559
|
|
0.60714286 0.19642857 0.60714286 0.60714286]
|
|
|
|
mean value: 0.42543808750199136
|
|
|
|
key: train_mcc
|
|
value: [0.77005354 0.72129053 0.70837776 0.70934757 0.75395302 0.62076318
|
|
0.74207545 0.67903618 0.69345557 0.7082535 ]
|
|
|
|
mean value: 0.710660629998064
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.6875 0.73333333 0.53333333 0.66666667 0.73333333
|
|
0.8 0.6 0.8 0.8 ]
|
|
|
|
mean value: 0.7104166666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.88235294 0.86029412 0.8540146 0.8540146 0.87591241 0.81021898
|
|
0.86861314 0.83941606 0.84671533 0.8540146 ]
|
|
|
|
mean value: 0.8545566766852727
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 0.71428571 0.58823529 0.61538462 0.66666667
|
|
0.8 0.625 0.8 0.8 ]
|
|
|
|
mean value: 0.7026238957121309
|
|
|
|
key: train_fscore
|
|
value: [0.875 0.85714286 0.85294118 0.85074627 0.87218045 0.80882353
|
|
0.859375 0.8358209 0.84444444 0.85074627]
|
|
|
|
mean value: 0.8507220891433295
|
|
|
|
key: test_precision
|
|
value: [0.75 0.71428571 0.71428571 0.5 0.66666667 0.8
|
|
0.85714286 0.625 0.85714286 0.85714286]
|
|
|
|
mean value: 0.7341666666666666
|
|
|
|
key: train_precision
|
|
value: [0.93333333 0.87692308 0.86567164 0.87692308 0.90625 0.82089552
|
|
0.91666667 0.84848485 0.85074627 0.86363636]
|
|
|
|
mean value: 0.8759530798803187
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.71428571 0.71428571 0.57142857 0.57142857
|
|
0.75 0.625 0.75 0.75 ]
|
|
|
|
mean value: 0.6821428571428572
|
|
|
|
key: train_recall
|
|
value: [0.82352941 0.83823529 0.84057971 0.82608696 0.84057971 0.79710145
|
|
0.80882353 0.82352941 0.83823529 0.83823529]
|
|
|
|
mean value: 0.8274936061381074
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.6875 0.73214286 0.54464286 0.66071429 0.72321429
|
|
0.80357143 0.59821429 0.80357143 0.80357143]
|
|
|
|
mean value: 0.7107142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.88235294 0.86029412 0.85411338 0.85421995 0.87617221 0.81031543
|
|
0.86817988 0.83930094 0.84665388 0.85390026]
|
|
|
|
mean value: 0.8545502983802217
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 0.55555556 0.41666667 0.44444444 0.5
|
|
0.66666667 0.45454545 0.66666667 0.66666667]
|
|
|
|
mean value: 0.5471212121212121
|
|
|
|
key: train_jcc
|
|
value: [0.77777778 0.75 0.74358974 0.74025974 0.77333333 0.67901235
|
|
0.75342466 0.71794872 0.73076923 0.74025974]
|
|
|
|
mean value: 0.7406375287151543
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.58816695 0.64362407 0.58632851 0.5975914 0.89613318 0.74753761
|
|
0.61819625 0.75936055 0.59009218 0.54823971]
|
|
|
|
mean value: 0.6575270414352417
|
|
|
|
key: score_time
|
|
value: [0.012115 0.01224399 0.01202631 0.01206684 0.01198483 0.01192689
|
|
0.01195455 0.01193309 0.01198435 0.01192069]
|
|
|
|
mean value: 0.012015652656555176
|
|
|
|
key: test_mcc
|
|
value: [0.12598816 0.40451992 0.75592895 0.66143783 0.875 0.34247476
|
|
0.6000992 0.32732684 0.64465837 0.56407607]
|
|
|
|
mean value: 0.5301510088184115
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.6875 0.86666667 0.8 0.93333333 0.66666667
|
|
0.8 0.66666667 0.8 0.73333333]
|
|
|
|
mean value: 0.7516666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.61538462 0.83333333 0.82352941 0.93333333 0.54545455
|
|
0.82352941 0.70588235 0.84210526 0.66666667]
|
|
|
|
mean value: 0.7377454227918624
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.8 1. 0.7 0.875 0.75
|
|
0.77777778 0.66666667 0.72727273 1. ]
|
|
|
|
mean value: 0.7852272727272728
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.5 0.71428571 1. 1. 0.42857143
|
|
0.875 0.75 1. 0.5 ]
|
|
|
|
mean value: 0.7392857142857143
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.6875 0.85714286 0.8125 0.9375 0.65178571
|
|
0.79464286 0.66071429 0.78571429 0.75 ]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.44444444 0.71428571 0.7 0.875 0.375
|
|
0.7 0.54545455 0.72727273 0.5 ]
|
|
|
|
mean value: 0.5998124098124098
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01483727 0.01472807 0.01215315 0.0125885 0.01221848 0.01251054
|
|
0.01217604 0.0122056 0.01364803 0.01133299]
|
|
|
|
mean value: 0.012839865684509278
|
|
|
|
key: score_time
|
|
value: [0.0116291 0.0092876 0.00965881 0.00963688 0.00916028 0.0092237
|
|
0.00999761 0.00896072 0.01119876 0.00884461]
|
|
|
|
mean value: 0.009759807586669922
|
|
|
|
key: test_mcc
|
|
value: [0.75 1. 1. 0.32732684 0.75592895 0.87287156
|
|
1. 1. 0.875 1. ]
|
|
|
|
mean value: 0.8581127342316412
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 1. 1. 0.66666667 0.86666667 0.93333333
|
|
1. 1. 0.93333333 1. ]
|
|
|
|
mean value: 0.9275
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 1. 1. 0.61538462 0.83333333 0.92307692
|
|
1. 1. 0.93333333 1. ]
|
|
|
|
mean value: 0.9180128205128205
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 1. 1. 0.66666667 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9541666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 1. 0.57142857 0.71428571 0.85714286
|
|
1. 1. 0.875 1. ]
|
|
|
|
mean value: 0.8892857142857142
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 1. 1. 0.66071429 0.85714286 0.92857143
|
|
1. 1. 0.9375 1. ]
|
|
|
|
mean value: 0.9258928571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 1. 1. 0.44444444 0.71428571 0.85714286
|
|
1. 1. 0.875 1. ]
|
|
|
|
mean value: 0.8668650793650794
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09064841 0.08952451 0.09027481 0.08903623 0.08798981 0.08831215
|
|
0.08723617 0.08700705 0.08866048 0.09040737]
|
|
|
|
mean value: 0.0889096975326538
|
|
|
|
key: score_time
|
|
value: [0.01793575 0.01817226 0.01708627 0.0171876 0.01869655 0.01704049
|
|
0.01695991 0.01761818 0.01703238 0.01859355]
|
|
|
|
mean value: 0.017632293701171874
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.62994079 0.73214286 0.19642857 0.47245559 0.6000992
|
|
0.6000992 0.47245559 0.75592895 0.6000992 ]
|
|
|
|
mean value: 0.5559649939908601
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.8125 0.86666667 0.6 0.73333333 0.8
|
|
0.8 0.73333333 0.86666667 0.8 ]
|
|
|
|
mean value: 0.77625
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.8 0.85714286 0.57142857 0.66666667 0.76923077
|
|
0.82352941 0.77777778 0.88888889 0.82352941]
|
|
|
|
mean value: 0.7728194354664942
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.85714286 0.85714286 0.57142857 0.8 0.83333333
|
|
0.77777778 0.7 0.8 0.77777778]
|
|
|
|
mean value: 0.7724603174603175
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.57142857 0.57142857 0.71428571
|
|
0.875 0.875 1. 0.875 ]
|
|
|
|
mean value: 0.7839285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.8125 0.86607143 0.59821429 0.72321429 0.79464286
|
|
0.79464286 0.72321429 0.85714286 0.79464286]
|
|
|
|
mean value: 0.7714285714285715
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.66666667 0.75 0.4 0.5 0.625
|
|
0.7 0.63636364 0.8 0.7 ]
|
|
|
|
mean value: 0.6378030303030303
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00881147 0.0088315 0.0086813 0.00854588 0.00864267 0.0085535
|
|
0.00905848 0.00856781 0.00901222 0.0118227 ]
|
|
|
|
mean value: 0.009052753448486328
|
|
|
|
key: score_time
|
|
value: [0.00882411 0.00846767 0.00852108 0.00859189 0.00842762 0.00838542
|
|
0.00842547 0.00838161 0.01362896 0.00881934]
|
|
|
|
mean value: 0.009047317504882812
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.51639778 0.20044593 0.60714286 0.20044593 0.53452248
|
|
0.19642857 0.07142857 0.66143783 0.66143783]
|
|
|
|
mean value: 0.40276522547293303
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.75 0.6 0.8 0.6 0.73333333
|
|
0.6 0.53333333 0.8 0.8 ]
|
|
|
|
mean value: 0.6904166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.71428571 0.4 0.8 0.4 0.6
|
|
0.625 0.53333333 0.76923077 0.76923077]
|
|
|
|
mean value: 0.6277747252747252
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.83333333 0.66666667 0.75 0.66666667 1.
|
|
0.625 0.57142857 1. 1. ]
|
|
|
|
mean value: 0.7827380952380952
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.625 0.28571429 0.85714286 0.28571429 0.42857143
|
|
0.625 0.5 0.625 0.625 ]
|
|
|
|
mean value: 0.5482142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.75 0.58035714 0.80357143 0.58035714 0.71428571
|
|
0.59821429 0.53571429 0.8125 0.8125 ]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.55555556 0.25 0.66666667 0.25 0.42857143
|
|
0.45454545 0.36363636 0.625 0.625 ]
|
|
|
|
mean value: 0.47189754689754687
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.14575934 1.1562829 1.13136363 1.12599468 1.13223815 1.13000441
|
|
1.13577652 1.14306259 1.21870971 1.1481657 ]
|
|
|
|
mean value: 1.1467357635498048
|
|
|
|
key: score_time
|
|
value: [0.08762646 0.08738136 0.08826542 0.08809686 0.09468246 0.08748865
|
|
0.08778977 0.09589839 0.09322262 0.0872426 ]
|
|
|
|
mean value: 0.08976945877075196
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.62994079 0.73214286 0.32732684 0.47245559 0.6000992
|
|
0.87287156 0.46428571 0.60714286 1. ]
|
|
|
|
mean value: 0.6206265402628611
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.8125 0.86666667 0.66666667 0.73333333 0.8
|
|
0.93333333 0.73333333 0.8 1. ]
|
|
|
|
mean value: 0.8095833333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.8 0.85714286 0.61538462 0.66666667 0.76923077
|
|
0.94117647 0.75 0.8 1. ]
|
|
|
|
mean value: 0.7949601379013144
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.85714286 0.85714286 0.66666667 0.8 0.83333333
|
|
0.88888889 0.75 0.85714286 1. ]
|
|
|
|
mean value: 0.826031746031746
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.57142857 0.57142857 0.71428571
|
|
1. 0.75 0.75 1. ]
|
|
|
|
mean value: 0.7714285714285715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.8125 0.86607143 0.66071429 0.72321429 0.79464286
|
|
0.92857143 0.73214286 0.80357143 1. ]
|
|
|
|
mean value: 0.8071428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.6 0.66666667 0.75 0.44444444 0.5 0.625
|
|
0.88888889 0.6 0.66666667 1. ]
|
|
|
|
mean value: 0.6741666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.91558361 0.93423796 0.81781912 0.90684462 0.8764143 0.91330528
|
|
0.91923761 0.87901211 0.94927478 0.85586262]
|
|
|
|
mean value: 0.8967592000961304
|
|
|
|
key: score_time
|
|
value: [0.18984222 0.22017837 0.20706201 0.2250824 0.25402594 0.19320369
|
|
0.23845482 0.22061253 0.19895887 0.19759345]
|
|
|
|
mean value: 0.21450142860412597
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.62994079 0.87287156 0.33928571 0.6000992 0.47245559
|
|
0.87287156 0.46428571 0.49099025 0.73214286]
|
|
|
|
mean value: 0.585290771140166
|
|
|
|
key: train_mcc
|
|
value: [0.92657079 0.94117647 0.92710997 0.92710997 0.94160273 0.91281179
|
|
0.92787101 0.91277477 0.92709446 0.94201665]
|
|
|
|
mean value: 0.9286138604809204
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.8125 0.93333333 0.66666667 0.8 0.73333333
|
|
0.93333333 0.73333333 0.73333333 0.86666667]
|
|
|
|
mean value: 0.79
|
|
|
|
key: train_accuracy
|
|
value: [0.96323529 0.97058824 0.96350365 0.96350365 0.97080292 0.95620438
|
|
0.96350365 0.95620438 0.96350365 0.97080292]
|
|
|
|
mean value: 0.9641852726492057
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.8 0.92307692 0.66666667 0.76923077 0.66666667
|
|
0.94117647 0.75 0.71428571 0.875 ]
|
|
|
|
mean value: 0.7811985563456152
|
|
|
|
key: train_fscore
|
|
value: [0.96296296 0.97058824 0.96350365 0.96350365 0.97101449 0.95588235
|
|
0.96240602 0.95522388 0.96296296 0.97101449]
|
|
|
|
mean value: 0.9639062694573148
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.85714286 1. 0.625 0.83333333 0.8
|
|
0.88888889 0.75 0.83333333 0.875 ]
|
|
|
|
mean value: 0.812936507936508
|
|
|
|
key: train_precision
|
|
value: [0.97014925 0.97058824 0.97058824 0.97058824 0.97101449 0.97014925
|
|
0.98461538 0.96969697 0.97014925 0.95714286]
|
|
|
|
mean value: 0.9704682171285217
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.71428571 0.71428571 0.57142857
|
|
1. 0.75 0.625 0.875 ]
|
|
|
|
mean value: 0.7607142857142857
|
|
|
|
key: train_recall
|
|
value: [0.95588235 0.97058824 0.95652174 0.95652174 0.97101449 0.94202899
|
|
0.94117647 0.94117647 0.95588235 0.98529412]
|
|
|
|
mean value: 0.9576086956521739
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.8125 0.92857143 0.66964286 0.79464286 0.72321429
|
|
0.92857143 0.73214286 0.74107143 0.86607143]
|
|
|
|
mean value: 0.7883928571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.96323529 0.97058824 0.96355499 0.96355499 0.97080136 0.95630861
|
|
0.96334186 0.95609548 0.96344842 0.97090793]
|
|
|
|
mean value: 0.9641837169650469
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.66666667 0.85714286 0.5 0.625 0.5
|
|
0.88888889 0.6 0.55555556 0.77777778]
|
|
|
|
mean value: 0.6516486291486291
|
|
|
|
key: train_jcc
|
|
value: [0.92857143 0.94285714 0.92957746 0.92957746 0.94366197 0.91549296
|
|
0.92753623 0.91428571 0.92857143 0.94366197]
|
|
|
|
mean value: 0.9303793777155688
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01198173 0.01064229 0.00866985 0.00969267 0.00863671 0.01164412
|
|
0.00910163 0.00864434 0.00873184 0.00863051]
|
|
|
|
mean value: 0.009637570381164551
|
|
|
|
key: score_time
|
|
value: [0.01324654 0.00842619 0.00931144 0.00852728 0.00858665 0.01137972
|
|
0.00852919 0.00900602 0.00848174 0.00837255]
|
|
|
|
mean value: 0.009386730194091798
|
|
|
|
key: test_mcc
|
|
value: [ 0.12598816 0.25819889 -0.26189246 -0.04029115 0.20044593 0.04029115
|
|
0.09449112 0.37796447 -0.02620712 -0.02620712]
|
|
|
|
mean value: 0.07427818649684113
|
|
|
|
key: train_mcc
|
|
value: [0.37721677 0.34459104 0.49711106 0.44876452 0.42884415 0.35028846
|
|
0.33060837 0.4337651 0.45582775 0.35292142]
|
|
|
|
mean value: 0.4019938637561584
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.625 0.4 0.46666667 0.6 0.53333333
|
|
0.53333333 0.66666667 0.46666667 0.46666667]
|
|
|
|
mean value: 0.5320833333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.67647059 0.66911765 0.73722628 0.72262774 0.7080292 0.67153285
|
|
0.64963504 0.7080292 0.71532847 0.67153285]
|
|
|
|
mean value: 0.6929529841133534
|
|
|
|
key: test_fscore
|
|
value: [0.53333333 0.57142857 0.18181818 0.55555556 0.4 0.36363636
|
|
0.46153846 0.61538462 0.2 0.2 ]
|
|
|
|
mean value: 0.4082695082695083
|
|
|
|
key: train_fscore
|
|
value: [0.60714286 0.63414634 0.69491525 0.70769231 0.67213115 0.64
|
|
0.54716981 0.65517241 0.65486726 0.62184874]
|
|
|
|
mean value: 0.6435086129323676
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.66666667 0.25 0.45454545 0.66666667 0.5
|
|
0.6 0.8 0.5 0.5 ]
|
|
|
|
mean value: 0.5509307359307359
|
|
|
|
key: train_precision
|
|
value: [0.77272727 0.70909091 0.83673469 0.75409836 0.77358491 0.71428571
|
|
0.76315789 0.79166667 0.82222222 0.7254902 ]
|
|
|
|
mean value: 0.7663058836001725
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.14285714 0.71428571 0.28571429 0.28571429
|
|
0.375 0.5 0.125 0.125 ]
|
|
|
|
mean value: 0.35535714285714287
|
|
|
|
key: train_recall
|
|
value: [0.5 0.57352941 0.5942029 0.66666667 0.5942029 0.57971014
|
|
0.42647059 0.55882353 0.54411765 0.54411765]
|
|
|
|
mean value: 0.5581841432225064
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.625 0.38392857 0.48214286 0.58035714 0.51785714
|
|
0.54464286 0.67857143 0.49107143 0.49107143]
|
|
|
|
mean value: 0.5357142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.67647059 0.66911765 0.73827792 0.72303922 0.70886616 0.67220801
|
|
0.6480179 0.706948 0.71408781 0.67060955]
|
|
|
|
mean value: 0.6927642796248934
|
|
|
|
key: test_jcc
|
|
value: [0.36363636 0.4 0.1 0.38461538 0.25 0.22222222
|
|
0.3 0.44444444 0.11111111 0.11111111]
|
|
|
|
mean value: 0.2687140637140637
|
|
|
|
key: train_jcc
|
|
value: [0.43589744 0.46428571 0.53246753 0.54761905 0.50617284 0.47058824
|
|
0.37662338 0.48717949 0.48684211 0.45121951]
|
|
|
|
mean value: 0.47588952863311645
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08329582 0.08896995 0.04954171 0.04974389 0.06016922 0.05285072
|
|
0.05686045 0.05500674 0.05559015 0.05193424]
|
|
|
|
mean value: 0.06039628982543945
|
|
|
|
key: score_time
|
|
value: [0.01032519 0.0116601 0.01022577 0.010571 0.01124644 0.01116228
|
|
0.01172423 0.01144004 0.01061893 0.01088452]
|
|
|
|
mean value: 0.010985851287841797
|
|
|
|
key: test_mcc
|
|
value: [0.75 1. 0.87287156 0.60714286 0.6000992 1.
|
|
0.87287156 0.875 1. 1. ]
|
|
|
|
mean value: 0.8577985177179775
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 1. 0.93333333 0.8 0.8 1.
|
|
0.93333333 0.93333333 1. 1. ]
|
|
|
|
mean value: 0.9275
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 1. 0.92307692 0.8 0.76923077 1.
|
|
0.94117647 0.93333333 1. 1. ]
|
|
|
|
mean value: 0.9241817496229261
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 1. 1. 0.75 0.83333333 1.
|
|
0.88888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9347222222222222
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 0.85714286 0.85714286 0.71428571 1.
|
|
1. 0.875 1. 1. ]
|
|
|
|
mean value: 0.9178571428571428
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 1. 0.92857143 0.80357143 0.79464286 1.
|
|
0.92857143 0.9375 1. 1. ]
|
|
|
|
mean value: 0.9267857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 1. 0.85714286 0.66666667 0.625 1.
|
|
0.88888889 0.875 1. 1. ]
|
|
|
|
mean value: 0.8690476190476191
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02963209 0.05196762 0.04611087 0.04891753 0.04869604 0.06951809
|
|
0.04088116 0.04300809 0.02362967 0.05074644]
|
|
|
|
mean value: 0.04531075954437256
|
|
|
|
key: score_time
|
|
value: [0.01993132 0.02110863 0.0204258 0.01733303 0.02020669 0.03094959
|
|
0.02189136 0.01178145 0.02161932 0.02005577]
|
|
|
|
mean value: 0.020530295372009278
|
|
|
|
key: test_mcc
|
|
value: [0.25819889 0.28867513 0.34247476 0.6000992 0.47245559 0.6000992
|
|
0.46770717 0.66143783 0.21821789 0.21821789]
|
|
|
|
mean value: 0.4127583553197128
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.625 0.66666667 0.8 0.73333333 0.8
|
|
0.66666667 0.8 0.6 0.6 ]
|
|
|
|
mean value: 0.6916666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.5 0.54545455 0.76923077 0.66666667 0.76923077
|
|
0.54545455 0.76923077 0.57142857 0.57142857]
|
|
|
|
mean value: 0.6279553779553779
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.75 0.83333333 0.8 0.83333333
|
|
1. 1. 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7966666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.375 0.42857143 0.71428571 0.57142857 0.71428571
|
|
0.375 0.625 0.5 0.5 ]
|
|
|
|
mean value: 0.5303571428571429
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.625 0.65178571 0.79464286 0.72321429 0.79464286
|
|
0.6875 0.8125 0.60714286 0.60714286]
|
|
|
|
mean value: 0.6928571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.33333333 0.375 0.625 0.5 0.625
|
|
0.375 0.625 0.4 0.4 ]
|
|
|
|
mean value: 0.4658333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01186013 0.00880361 0.00883102 0.00854611 0.00853038 0.00845814
|
|
0.00847054 0.00839543 0.00853038 0.00842953]
|
|
|
|
mean value: 0.008885526657104492
|
|
|
|
key: score_time
|
|
value: [0.00882626 0.00880146 0.00849986 0.00830603 0.00833178 0.00832748
|
|
0.00834227 0.00829768 0.00834322 0.00830102]
|
|
|
|
mean value: 0.008437705039978028
|
|
|
|
key: test_mcc
|
|
value: [0.25819889 0.25819889 0.33928571 0.13363062 0.21821789 0.32732684
|
|
0.46428571 0.18898224 0.49099025 0.33928571]
|
|
|
|
mean value: 0.30184027584332546
|
|
|
|
key: train_mcc
|
|
value: [0.44602578 0.44602578 0.47900717 0.4540104 0.52059257 0.41608027
|
|
0.49197671 0.38537806 0.43208129 0.4754592 ]
|
|
|
|
mean value: 0.4546637228298039
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.625 0.66666667 0.53333333 0.6 0.66666667
|
|
0.73333333 0.6 0.73333333 0.66666667]
|
|
|
|
mean value: 0.645
|
|
|
|
key: train_accuracy
|
|
value: [0.72058824 0.72058824 0.73722628 0.72262774 0.75912409 0.7080292
|
|
0.74452555 0.68613139 0.71532847 0.73722628]
|
|
|
|
mean value: 0.7251395448690425
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.66666667 0.63157895 0.625 0.61538462
|
|
0.75 0.66666667 0.71428571 0.66666667]
|
|
|
|
mean value: 0.6669582610372083
|
|
|
|
key: train_fscore
|
|
value: [0.73972603 0.73972603 0.75675676 0.75 0.77241379 0.71428571
|
|
0.75524476 0.71895425 0.72340426 0.74285714]
|
|
|
|
mean value: 0.7413368720727499
|
|
|
|
key: test_precision
|
|
value: [0.6 0.6 0.625 0.5 0.55555556 0.66666667
|
|
0.75 0.6 0.83333333 0.71428571]
|
|
|
|
mean value: 0.6444841269841269
|
|
|
|
key: train_precision
|
|
value: [0.69230769 0.69230769 0.70886076 0.68674699 0.73684211 0.70422535
|
|
0.72 0.64705882 0.69863014 0.72222222]
|
|
|
|
mean value: 0.7009201772174631
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.71428571 0.85714286 0.71428571 0.57142857
|
|
0.75 0.75 0.625 0.625 ]
|
|
|
|
mean value: 0.7107142857142857
|
|
|
|
key: train_recall
|
|
value: [0.79411765 0.79411765 0.8115942 0.82608696 0.8115942 0.72463768
|
|
0.79411765 0.80882353 0.75 0.76470588]
|
|
|
|
mean value: 0.7879795396419438
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.625 0.66964286 0.55357143 0.60714286 0.66071429
|
|
0.73214286 0.58928571 0.74107143 0.66964286]
|
|
|
|
mean value: 0.6473214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.72058824 0.72058824 0.73667945 0.72186701 0.75873828 0.70790708
|
|
0.74488491 0.68702046 0.71557971 0.7374254 ]
|
|
|
|
mean value: 0.7251278772378517
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.5 0.46153846 0.45454545 0.44444444
|
|
0.6 0.5 0.55555556 0.5 ]
|
|
|
|
mean value: 0.5016083916083917
|
|
|
|
key: train_jcc
|
|
value: [0.58695652 0.58695652 0.60869565 0.6 0.62921348 0.55555556
|
|
0.60674157 0.56122449 0.56666667 0.59090909]
|
|
|
|
mean value: 0.5892919554759181
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01088572 0.01611996 0.01455903 0.01455474 0.01532435 0.01476288
|
|
0.0145545 0.01435685 0.01525736 0.01636076]
|
|
|
|
mean value: 0.014673614501953125
|
|
|
|
key: score_time
|
|
value: [0.00849771 0.01114488 0.01114917 0.01125813 0.01136613 0.01144791
|
|
0.01134324 0.01134682 0.01133847 0.01127172]
|
|
|
|
mean value: 0.011016416549682616
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.57735027 0.73214286 0.60714286 0.47245559 0.47245559
|
|
0.87287156 0.37796447 0.64465837 0.66143783]
|
|
|
|
mean value: 0.5918479398938057
|
|
|
|
key: train_mcc
|
|
value: [0.94280904 0.94280904 0.9158731 0.92951942 0.90259957 0.87631485
|
|
0.97120941 0.82498207 0.98550725 0.98550418]
|
|
|
|
mean value: 0.927712793196434
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.86666667 0.8 0.73333333 0.73333333
|
|
0.93333333 0.66666667 0.8 0.8 ]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.97058824 0.97058824 0.95620438 0.96350365 0.94890511 0.93430657
|
|
0.98540146 0.90510949 0.99270073 0.99270073]
|
|
|
|
mean value: 0.9620008587376556
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 0.85714286 0.8 0.66666667 0.66666667
|
|
0.94117647 0.61538462 0.84210526 0.76923077]
|
|
|
|
mean value: 0.7575039975504372
|
|
|
|
key: train_fscore
|
|
value: [0.96969697 0.96969697 0.95833333 0.96240602 0.94656489 0.93023256
|
|
0.98507463 0.89430894 0.99270073 0.99259259]
|
|
|
|
mean value: 0.9601607623875288
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.85714286 0.75 0.8 0.8
|
|
0.88888889 0.8 0.72727273 1. ]
|
|
|
|
mean value: 0.8373304473304474
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.92 1. 1. 1.
|
|
1. 1. 0.98550725 1. ]
|
|
|
|
mean value: 0.9905507246376811
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.85714286 0.85714286 0.57142857 0.57142857
|
|
1. 0.5 1. 0.625 ]
|
|
|
|
mean value: 0.7232142857142857
|
|
|
|
key: train_recall
|
|
value: [0.94117647 0.94117647 1. 0.92753623 0.89855072 0.86956522
|
|
0.97058824 0.80882353 1. 0.98529412]
|
|
|
|
mean value: 0.9342710997442455
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.86607143 0.80357143 0.72321429 0.72321429
|
|
0.92857143 0.67857143 0.78571429 0.8125 ]
|
|
|
|
mean value: 0.7821428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.97058824 0.97058824 0.95588235 0.96376812 0.94927536 0.93478261
|
|
0.98529412 0.90441176 0.99275362 0.99264706]
|
|
|
|
mean value: 0.961999147485081
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 0.75 0.66666667 0.5 0.5
|
|
0.88888889 0.44444444 0.72727273 0.625 ]
|
|
|
|
mean value: 0.6202272727272727
|
|
|
|
key: train_jcc
|
|
value: [0.94117647 0.94117647 0.92 0.92753623 0.89855072 0.86956522
|
|
0.97058824 0.80882353 0.98550725 0.98529412]
|
|
|
|
mean value: 0.9248218243819267
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0133729 0.01307631 0.01310182 0.01377559 0.01355219 0.01383233
|
|
0.01379371 0.0128088 0.01407123 0.01299 ]
|
|
|
|
mean value: 0.013437485694885254
|
|
|
|
key: score_time
|
|
value: [0.01168633 0.01124477 0.01129055 0.01130223 0.01146483 0.01131105
|
|
0.01131058 0.01155877 0.01123357 0.01130033]
|
|
|
|
mean value: 0.011370301246643066
|
|
|
|
key: test_mcc
|
|
value: [0.25819889 0.48038446 0.75592895 0.60714286 0.28571429 0.47245559
|
|
0.76376262 0.46770717 0.73214286 0.36689969]
|
|
|
|
mean value: 0.5190337370467798
|
|
|
|
key: train_mcc
|
|
value: [0.21483446 0.5547002 0.85060976 0.88938138 0.4690195 0.98550725
|
|
0.73976828 0.64686584 0.97080136 0.52198402]
|
|
|
|
mean value: 0.6843472055199719
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.6875 0.86666667 0.8 0.6 0.73333333
|
|
0.86666667 0.66666667 0.86666667 0.6 ]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_accuracy
|
|
value: [0.54411765 0.73529412 0.91970803 0.94160584 0.67883212 0.99270073
|
|
0.8540146 0.79562044 0.98540146 0.71532847]
|
|
|
|
mean value: 0.8162623443538
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.54545455 0.83333333 0.8 0.25 0.66666667
|
|
0.85714286 0.54545455 0.875 0.4 ]
|
|
|
|
mean value: 0.6468704121964991
|
|
|
|
key: train_fscore
|
|
value: [0.68686869 0.64 0.91338583 0.93846154 0.53191489 0.99270073
|
|
0.82758621 0.74074074 0.98529412 0.59793814]
|
|
|
|
mean value: 0.7854890885260156
|
|
|
|
key: test_precision
|
|
value: [0.53333333 1. 1. 0.75 1. 0.8
|
|
1. 1. 0.875 1. ]
|
|
|
|
mean value: 0.8958333333333334
|
|
|
|
key: train_precision
|
|
value: [0.52307692 1. 1. 1. 1. 1.
|
|
1. 1. 0.98529412 1. ]
|
|
|
|
mean value: 0.9508371040723982
|
|
|
|
key: test_recall
|
|
value: [1. 0.375 0.71428571 0.85714286 0.14285714 0.57142857
|
|
0.75 0.375 0.875 0.25 ]
|
|
|
|
mean value: 0.5910714285714286
|
|
|
|
key: train_recall
|
|
value: [1. 0.47058824 0.84057971 0.88405797 0.36231884 0.98550725
|
|
0.70588235 0.58823529 0.98529412 0.42647059]
|
|
|
|
mean value: 0.7248934356351237
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.6875 0.85714286 0.80357143 0.57142857 0.72321429
|
|
0.875 0.6875 0.86607143 0.625 ]
|
|
|
|
mean value: 0.7258928571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.54411765 0.73529412 0.92028986 0.94202899 0.68115942 0.99275362
|
|
0.85294118 0.79411765 0.98540068 0.71323529]
|
|
|
|
mean value: 0.8161338448422848
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.375 0.71428571 0.66666667 0.14285714 0.5
|
|
0.75 0.375 0.77777778 0.25 ]
|
|
|
|
mean value: 0.5084920634920634
|
|
|
|
key: train_jcc
|
|
value: [0.52307692 0.47058824 0.84057971 0.88405797 0.36231884 0.98550725
|
|
0.70588235 0.58823529 0.97101449 0.42647059]
|
|
|
|
mean value: 0.6757731654534723
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1091814 0.09360719 0.09445 0.09590602 0.09871435 0.09428263
|
|
0.0952394 0.09724092 0.09747028 0.09545755]
|
|
|
|
mean value: 0.09715497493743896
|
|
|
|
key: score_time
|
|
value: [0.01468515 0.01477242 0.0149014 0.01512694 0.01514816 0.01505828
|
|
0.01533699 0.01503086 0.0151031 0.01507807]
|
|
|
|
mean value: 0.015024137496948243
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 1. 0.87287156 0.76376262 0.73214286 0.64465837
|
|
0.87287156 1. 1. 1. ]
|
|
|
|
mean value: 0.8516247754425785
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 1. 0.93333333 0.86666667 0.86666667 0.8
|
|
0.93333333 1. 1. 1. ]
|
|
|
|
mean value: 0.92125
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 1. 0.92307692 0.875 0.85714286 0.72727273
|
|
0.94117647 1. 1. 1. ]
|
|
|
|
mean value: 0.9123668978080742
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 1. 1. 0.77777778 0.85714286 1.
|
|
0.88888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9380952380952381
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.85714286 1. 0.85714286 0.57142857
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9035714285714286
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 1. 0.92857143 0.875 0.86607143 0.78571429
|
|
0.92857143 1. 1. 1. ]
|
|
|
|
mean value: 0.9196428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 1. 0.85714286 0.77777778 0.75 0.57142857
|
|
0.88888889 1. 1. 1. ]
|
|
|
|
mean value: 0.8511904761904762
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03675842 0.03842211 0.05185056 0.05657411 0.04770231 0.03377151
|
|
0.04809737 0.06028175 0.04753661 0.03637218]
|
|
|
|
mean value: 0.0457366943359375
|
|
|
|
key: score_time
|
|
value: [0.02331495 0.03389597 0.03279161 0.03452301 0.01713157 0.02505445
|
|
0.02235079 0.03766513 0.02260923 0.01936841]
|
|
|
|
mean value: 0.02687051296234131
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 1. 1. 0.32732684 0.6000992 0.75592895
|
|
0.87287156 0.875 0.875 1. ]
|
|
|
|
mean value: 0.7936167328814103
|
|
|
|
key: train_mcc
|
|
value: [0.97100831 1. 0.98550725 0.98550725 0.98550725 0.98550725
|
|
0.98550418 1. 0.98550418 1. ]
|
|
|
|
mean value: 0.9884045656539326
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 1. 1. 0.66666667 0.8 0.86666667
|
|
0.93333333 0.93333333 0.93333333 1. ]
|
|
|
|
mean value: 0.8945833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.98529412 1. 0.99270073 0.99270073 0.99270073 0.99270073
|
|
0.99270073 1. 0.99270073 1. ]
|
|
|
|
mean value: 0.9941498497209103
|
|
|
|
key: test_fscore
|
|
value: [0.8 1. 1. 0.61538462 0.76923077 0.83333333
|
|
0.94117647 0.93333333 0.93333333 1. ]
|
|
|
|
mean value: 0.882579185520362
|
|
|
|
key: train_fscore
|
|
value: [0.98507463 1. 0.99270073 0.99270073 0.99270073 0.99270073
|
|
0.99259259 1. 0.99259259 1. ]
|
|
|
|
mean value: 0.9941062731758886
|
|
|
|
key: test_precision
|
|
value: [0.85714286 1. 1. 0.66666667 0.83333333 1.
|
|
0.88888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9246031746031746
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.57142857 0.71428571 0.71428571
|
|
1. 0.875 0.875 1. ]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [0.97058824 1. 0.98550725 0.98550725 0.98550725 0.98550725
|
|
0.98529412 1. 0.98529412 1. ]
|
|
|
|
mean value: 0.9883205456095482
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 1. 1. 0.66071429 0.79464286 0.85714286
|
|
0.92857143 0.9375 0.9375 1. ]
|
|
|
|
mean value: 0.8928571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.98529412 1. 0.99275362 0.99275362 0.99275362 0.99275362
|
|
0.99264706 1. 0.99264706 1. ]
|
|
|
|
mean value: 0.994160272804774
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 1. 1. 0.44444444 0.625 0.71428571
|
|
0.88888889 0.875 0.875 1. ]
|
|
|
|
mean value: 0.8089285714285714
|
|
|
|
key: train_jcc
|
|
value: [0.97058824 1. 0.98550725 0.98550725 0.98550725 0.98550725
|
|
0.98529412 1. 0.98529412 1. ]
|
|
|
|
mean value: 0.9883205456095482
|
|
|
|
MCC on Blind test: 0.67
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03348088 0.05689144 0.05448914 0.05212975 0.05107021 0.06284046
|
|
0.05529141 0.06141233 0.0621357 0.04672551]
|
|
|
|
mean value: 0.05364668369293213
|
|
|
|
key: score_time
|
|
value: [0.02260804 0.02222872 0.02446175 0.02454901 0.02493072 0.02065587
|
|
0.02335095 0.02346754 0.02365112 0.02566361]
|
|
|
|
mean value: 0.023556733131408693
|
|
|
|
key: test_mcc
|
|
value: [0.25 0.67419986 0.6000992 0.33928571 0.20044593 0.32732684
|
|
0.49099025 0.07142857 0.19642857 0.76376262]
|
|
|
|
mean value: 0.3913967553400341
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.8125 0.8 0.66666667 0.6 0.66666667
|
|
0.73333333 0.53333333 0.6 0.86666667]
|
|
|
|
mean value: 0.6904166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.76923077 0.76923077 0.66666667 0.4 0.61538462
|
|
0.71428571 0.53333333 0.625 0.85714286]
|
|
|
|
mean value: 0.6575274725274725
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 1. 0.83333333 0.625 0.66666667 0.66666667
|
|
0.83333333 0.57142857 0.625 1. ]
|
|
|
|
mean value: 0.7446428571428572
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.625 0.71428571 0.71428571 0.28571429 0.57142857
|
|
0.625 0.5 0.625 0.75 ]
|
|
|
|
mean value: 0.6035714285714285
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.8125 0.79464286 0.66964286 0.58035714 0.66071429
|
|
0.74107143 0.53571429 0.59821429 0.875 ]
|
|
|
|
mean value: 0.6892857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.625 0.625 0.5 0.25 0.44444444
|
|
0.55555556 0.36363636 0.45454545 0.75 ]
|
|
|
|
mean value: 0.5022727272727273
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.29112267 0.26599479 0.26156926 0.25661635 0.258183 0.25448561
|
|
0.25596333 0.26583529 0.25837755 0.25584912]
|
|
|
|
mean value: 0.262399697303772
|
|
|
|
key: score_time
|
|
value: [0.0099287 0.00933981 0.00979567 0.00901675 0.00962639 0.00900555
|
|
0.00895739 0.00909352 0.00914288 0.00907183]
|
|
|
|
mean value: 0.009297847747802734
|
|
|
|
key: test_mcc
|
|
value: [0.75 1. 1. 0.46428571 0.6000992 0.87287156
|
|
0.87287156 1. 0.875 1. ]
|
|
|
|
mean value: 0.8435128034322632
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 1. 1. 0.73333333 0.8 0.93333333
|
|
0.93333333 1. 0.93333333 1. ]
|
|
|
|
mean value: 0.9208333333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 1. 1. 0.71428571 0.76923077 0.92307692
|
|
0.94117647 1. 0.93333333 1. ]
|
|
|
|
mean value: 0.9156103210514975
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 1. 1. 0.71428571 0.83333333 1.
|
|
0.88888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9311507936507937
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 1. 0.71428571 0.71428571 0.85714286
|
|
1. 1. 0.875 1. ]
|
|
|
|
mean value: 0.9035714285714286
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 1. 1. 0.73214286 0.79464286 0.92857143
|
|
0.92857143 1. 0.9375 1. ]
|
|
|
|
mean value: 0.9196428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 1. 1. 0.55555556 0.625 0.85714286
|
|
0.88888889 1. 0.875 1. ]
|
|
|
|
mean value: 0.8579365079365079
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01512146 0.01786995 0.01726413 0.01764679 0.01750636 0.01636434
|
|
0.01493645 0.01331592 0.01431489 0.0160501 ]
|
|
|
|
mean value: 0.016039037704467775
|
|
|
|
key: score_time
|
|
value: [0.01191425 0.01210332 0.01256967 0.01204371 0.01179075 0.01194525
|
|
0.00977325 0.01127338 0.01174974 0.01538849]
|
|
|
|
mean value: 0.012055182456970214
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.67419986 0.66143783 0.66143783 0.875 0.76376262
|
|
0.64465837 0.64465837 0.64465837 0.75592895]
|
|
|
|
mean value: 0.6703706666510105
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.8125 0.8 0.8 0.93333333 0.86666667
|
|
0.8 0.8 0.8 0.86666667]
|
|
|
|
mean value: 0.8104166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.84210526 0.82352941 0.82352941 0.93333333 0.875
|
|
0.84210526 0.84210526 0.84210526 0.88888889]
|
|
|
|
mean value: 0.843997482565594
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.72727273 0.7 0.7 0.875 0.77777778
|
|
0.72727273 0.72727273 0.72727273 0.8 ]
|
|
|
|
mean value: 0.7333297258297258
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.8125 0.8125 0.8125 0.9375 0.875
|
|
0.78571429 0.78571429 0.78571429 0.85714286]
|
|
|
|
mean value: 0.8089285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.72727273 0.7 0.7 0.875 0.77777778
|
|
0.72727273 0.72727273 0.72727273 0.8 ]
|
|
|
|
mean value: 0.7333297258297258
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03330803 0.04024577 0.03240132 0.03274846 0.03767657 0.03365231
|
|
0.03263354 0.03271079 0.03440356 0.03430128]
|
|
|
|
mean value: 0.034408164024353025
|
|
|
|
key: score_time
|
|
value: [0.02091265 0.01603365 0.0221107 0.01166129 0.01157904 0.02239871
|
|
0.02273679 0.02029014 0.01149106 0.01956654]
|
|
|
|
mean value: 0.017878055572509766
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.57735027 0.75592895 0.60714286 0.6000992 0.73214286
|
|
1. 0.60714286 0.87287156 0.76376262]
|
|
|
|
mean value: 0.7016441161555573
|
|
|
|
key: train_mcc
|
|
value: [0.97058824 0.94158382 0.95630861 0.98550418 0.98550725 0.97080136
|
|
0.97080136 0.97120941 0.97120941 0.97080136]
|
|
|
|
mean value: 0.9694315005226638
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.86666667 0.8 0.8 0.86666667
|
|
1. 0.8 0.93333333 0.86666667]
|
|
|
|
mean value: 0.8433333333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.98529412 0.97058824 0.97810219 0.99270073 0.99270073 0.98540146
|
|
0.98540146 0.98540146 0.98540146 0.98540146]
|
|
|
|
mean value: 0.9846393301846286
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 0.83333333 0.8 0.76923077 0.85714286
|
|
1. 0.8 0.94117647 0.85714286]
|
|
|
|
mean value: 0.8274692954104719
|
|
|
|
key: train_fscore
|
|
value: [0.98529412 0.97014925 0.97810219 0.99280576 0.99270073 0.98550725
|
|
0.98529412 0.98507463 0.98507463 0.98529412]
|
|
|
|
mean value: 0.9845296781884387
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 1. 0.75 0.83333333 0.85714286
|
|
1. 0.85714286 0.88888889 1. ]
|
|
|
|
mean value: 0.8936507936507936
|
|
|
|
key: train_precision
|
|
value: [0.98529412 0.98484848 0.98529412 0.98571429 1. 0.98550725
|
|
0.98529412 1. 1. 0.98529412]
|
|
|
|
mean value: 0.9897246487527818
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.71428571 0.85714286 0.71428571 0.85714286
|
|
1. 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.7892857142857143
|
|
|
|
key: train_recall
|
|
value: [0.98529412 0.95588235 0.97101449 1. 0.98550725 0.98550725
|
|
0.98529412 0.97058824 0.97058824 0.98529412]
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
mean value: 0.9794970161977835
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.85714286 0.80357143 0.79464286 0.86607143
|
|
1. 0.80357143 0.92857143 0.875 ]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.98529412 0.97058824 0.97815431 0.99264706 0.99275362 0.98540068
|
|
0.98540068 0.98529412 0.98529412 0.98540068]
|
|
|
|
mean value: 0.9846227621483377
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 0.71428571 0.66666667 0.625 0.75
|
|
1. 0.66666667 0.88888889 0.75 ]
|
|
|
|
mean value: 0.7161507936507936
|
|
|
|
key: train_jcc
|
|
value: [0.97101449 0.94202899 0.95714286 0.98571429 0.98550725 0.97142857
|
|
0.97101449 0.97058824 0.97058824 0.97101449]
|
|
|
|
mean value: 0.9696041895018878
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.19231176 0.09797359 0.18943858 0.19678831 0.19396234 0.20745254
|
|
0.19534802 0.20337844 0.23157358 0.1521194 ]
|
|
|
|
mean value: 0.1860346555709839
|
|
|
|
key: score_time
|
|
value: [0.02210021 0.01170564 0.01970696 0.02110434 0.01916718 0.02101612
|
|
0.02250338 0.02309155 0.02029562 0.01569939]
|
|
|
|
mean value: 0.019639039039611818
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.77459667 0.75592895 0.60714286 0.6000992 0.73214286
|
|
1. 0.60714286 0.60714286 0.76376262]
|
|
|
|
mean value: 0.6947958857806319
|
|
|
|
key: train_mcc
|
|
value: [0.97058824 1. 1. 0.98550418 0.98550725 1.
|
|
0.97080136 0.98550418 1. 0.97080136]
|
|
|
|
mean value: 0.9868706568295526
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.875 0.86666667 0.8 0.8 0.86666667
|
|
1. 0.8 0.8 0.86666667]
|
|
|
|
mean value: 0.8425
|
|
|
|
key: train_accuracy
|
|
value: [0.98529412 1. 1. 0.99270073 0.99270073 1.
|
|
0.98540146 0.99270073 1. 0.98540146]
|
|
|
|
mean value: 0.993419922713611
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.85714286 0.83333333 0.8 0.76923077 0.85714286
|
|
1. 0.8 0.8 0.85714286]
|
|
|
|
mean value: 0.8323992673992674
|
|
|
|
key: train_fscore
|
|
value: [0.98529412 1. 1. 0.99280576 0.99270073 1.
|
|
0.98529412 0.99259259 1. 0.98529412]
|
|
|
|
mean value: 0.993398143085646
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 1. 0.75 0.83333333 0.85714286
|
|
1. 0.85714286 0.85714286 1. ]
|
|
|
|
mean value: 0.8904761904761904
|
|
|
|
key: train_precision
|
|
value: [0.98529412 1. 1. 0.98571429 1. 1.
|
|
0.98529412 1. 1. 0.98529412]
|
|
|
|
mean value: 0.9941596638655462
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.71428571 0.85714286 0.71428571 0.85714286
|
|
1. 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.7892857142857143
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 1. 1. 0.98550725 1.
|
|
0.98529412 0.98529412 1. 0.98529412]
|
|
|
|
mean value: 0.9926683716965047
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.875 0.85714286 0.80357143 0.79464286 0.86607143
|
|
1. 0.80357143 0.80357143 0.875 ]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.98529412 1. 1. 0.99264706 0.99275362 1.
|
|
0.98540068 0.99264706 1. 0.98540068]
|
|
|
|
mean value: 0.9934143222506394
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.75 0.71428571 0.66666667 0.625 0.75
|
|
1. 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7189285714285714
|
|
|
|
key: train_jcc
|
|
value: [0.97101449 1. 1. 0.98571429 0.98550725 1.
|
|
0.97101449 0.98529412 1. 0.97101449]
|
|
|
|
mean value: 0.9869559127999026
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02999878 0.01928473 0.0235436 0.02440357 0.02231836 0.02035689
|
|
0.02418208 0.02052903 0.02584195 0.02433228]
|
|
|
|
mean value: 0.023479127883911134
|
|
|
|
key: score_time
|
|
value: [0.00970721 0.0114491 0.0115521 0.01158023 0.0113039 0.01153755
|
|
0.01155233 0.01156402 0.01155329 0.01153016]
|
|
|
|
mean value: 0.011332988739013672
|
|
|
|
key: test_mcc
|
|
value: [ 0.65465367 0.2 0.65465367 0.5 0.1 0.47809144
|
|
0.55 0.55 -0.1 0.63245553]
|
|
|
|
mean value: 0.4219854317183388
|
|
|
|
key: train_mcc
|
|
value: [0.83356964 0.85714286 0.9047619 0.85811633 0.88463794 0.81173865
|
|
0.83554817 0.78836577 0.83545719 0.8596795 ]
|
|
|
|
mean value: 0.8469017962332058
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 0.8 0.7 0.55555556 0.66666667
|
|
0.77777778 0.77777778 0.44444444 0.77777778]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_accuracy
|
|
value: [0.91666667 0.92857143 0.95238095 0.92857143 0.94117647 0.90588235
|
|
0.91764706 0.89411765 0.91764706 0.92941176]
|
|
|
|
mean value: 0.9232072829131652
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.6 0.83333333 0.76923077 0.5 0.72727273
|
|
0.75 0.8 0.44444444 0.75 ]
|
|
|
|
mean value: 0.6924281274281274
|
|
|
|
key: train_fscore
|
|
value: [0.91764706 0.92857143 0.95238095 0.92682927 0.93975904 0.90697674
|
|
0.91764706 0.89156627 0.91566265 0.92682927]
|
|
|
|
mean value: 0.922386973117808
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.71428571 0.625 0.5 0.57142857
|
|
0.75 0.8 0.5 1. ]
|
|
|
|
mean value: 0.7060714285714286
|
|
|
|
key: train_precision
|
|
value: [0.90697674 0.92857143 0.95238095 0.95 0.975 0.90697674
|
|
0.92857143 0.90243902 0.92682927 0.95 ]
|
|
|
|
mean value: 0.9327745590578829
|
|
|
|
key: test_recall
|
|
value: [0.6 0.6 1. 1. 0.5 1. 0.75 0.8 0.4 0.6 ]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_recall
|
|
value: [0.92857143 0.92857143 0.95238095 0.9047619 0.90697674 0.90697674
|
|
0.90697674 0.88095238 0.9047619 0.9047619 ]
|
|
|
|
mean value: 0.9125692137320044
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 0.8 0.7 0.55 0.7 0.775 0.775 0.45 0.8 ]
|
|
|
|
mean value: 0.6950000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.91666667 0.92857143 0.95238095 0.92857143 0.94158361 0.90586932
|
|
0.91777409 0.89396456 0.91749723 0.92912514]
|
|
|
|
mean value: 0.9232004429678848
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.42857143 0.71428571 0.625 0.33333333 0.57142857
|
|
0.6 0.66666667 0.28571429 0.6 ]
|
|
|
|
mean value: 0.5425
|
|
|
|
key: train_jcc
|
|
value: [0.84782609 0.86666667 0.90909091 0.86363636 0.88636364 0.82978723
|
|
0.84782609 0.80434783 0.84444444 0.86363636]
|
|
|
|
mean value: 0.8563625617880937
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.61250806 0.63184762 0.73407745 0.6185782 0.73825121 0.76966834
|
|
0.59189129 0.61164641 0.79721093 0.59707117]
|
|
|
|
mean value: 0.670275068283081
|
|
|
|
key: score_time
|
|
value: [0.01201868 0.01206088 0.01558685 0.01249576 0.01220727 0.01594925
|
|
0.01200271 0.01288962 0.01172495 0.01204181]
|
|
|
|
mean value: 0.012897777557373046
|
|
|
|
key: test_mcc
|
|
value: [ 0.2 0.2 0.65465367 0.81649658 0.31622777 0.15811388
|
|
-0.1 0.63245553 0.1 0.55 ]
|
|
|
|
mean value: 0.3527947432694636
|
|
|
|
key: train_mcc
|
|
value: [1. 0.95346259 1. 1. 1. 1.
|
|
0.46406205 1. 0.48228128 1. ]
|
|
|
|
mean value: 0.8899805920667087
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 0.8 0.9 0.66666667 0.55555556
|
|
0.44444444 0.77777778 0.55555556 0.77777778]
|
|
|
|
mean value: 0.6677777777777778
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.97619048 1. 1. 1. 1.
|
|
0.72941176 1. 0.74117647 1. ]
|
|
|
|
mean value: 0.9446778711484594
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.6 0.83333333 0.90909091 0.57142857 0.6
|
|
0.44444444 0.75 0.6 0.8 ]
|
|
|
|
mean value: 0.6708297258297258
|
|
|
|
key: train_fscore
|
|
value: [1. 0.97560976 1. 1. 1. 1.
|
|
0.75268817 1. 0.73809524 1. ]
|
|
|
|
mean value: 0.946639316623581
|
|
|
|
key: test_precision
|
|
value: [0.6 0.6 0.71428571 0.83333333 0.66666667 0.5
|
|
0.4 1. 0.6 0.8 ]
|
|
|
|
mean value: 0.6714285714285715
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.7 1. 0.73809524 1. ]
|
|
|
|
mean value: 0.9438095238095238
|
|
|
|
key: test_recall
|
|
value: [0.6 0.6 1. 1. 0.5 0.75 0.5 0.6 0.6 0.8 ]
|
|
|
|
mean value: 0.695
|
|
|
|
key: train_recall
|
|
value: [1. 0.95238095 1. 1. 1. 1.
|
|
0.81395349 1. 0.73809524 1. ]
|
|
|
|
mean value: 0.9504429678848283
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.6 0.8 0.9 0.65 0.575 0.45 0.8 0.55 0.775]
|
|
|
|
mean value: 0.67
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.97619048 1. 1. 1. 1.
|
|
0.72840532 1. 0.74114064 1. ]
|
|
|
|
mean value: 0.9445736434108527
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.42857143 0.71428571 0.83333333 0.4 0.42857143
|
|
0.28571429 0.6 0.42857143 0.66666667]
|
|
|
|
mean value: 0.5214285714285715
|
|
|
|
key: train_jcc
|
|
value: [1. 0.95238095 1. 1. 1. 1.
|
|
0.60344828 1. 0.58490566 1. ]
|
|
|
|
mean value: 0.914073488862038
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01196694 0.01163411 0.00893235 0.00867558 0.00836754 0.00866222
|
|
0.00830102 0.00846601 0.0097506 0.0096159 ]
|
|
|
|
mean value: 0.009437227249145507
|
|
|
|
key: score_time
|
|
value: [0.01155472 0.00901818 0.00884295 0.00876236 0.00837541 0.00872231
|
|
0.00852418 0.00846386 0.00991201 0.00856042]
|
|
|
|
mean value: 0.009073638916015625
|
|
|
|
key: test_mcc
|
|
value: [ 0.65465367 0.2 0.65465367 0.5 0.47809144 0.47809144
|
|
-0.05976143 0.55 0.31622777 0.35 ]
|
|
|
|
mean value: 0.41219565644335876
|
|
|
|
key: train_mcc
|
|
value: [0.4426423 0.4843221 0.48112522 0.52923775 0.52960905 0.41635298
|
|
0.52960905 0.44234856 0.47213452 0.41081572]
|
|
|
|
mean value: 0.4738197262497176
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 0.8 0.7 0.66666667 0.66666667
|
|
0.44444444 0.77777778 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6788888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.70238095 0.72619048 0.73809524 0.76190476 0.75294118 0.69411765
|
|
0.75294118 0.70588235 0.71764706 0.68235294]
|
|
|
|
mean value: 0.7234453781512605
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.6 0.83333333 0.76923077 0.72727273 0.72727273
|
|
0.54545455 0.8 0.72727273 0.66666667]
|
|
|
|
mean value: 0.722983682983683
|
|
|
|
key: train_fscore
|
|
value: [0.75247525 0.76767677 0.75555556 0.77777778 0.78787879 0.74509804
|
|
0.78787879 0.74747475 0.76 0.73786408]
|
|
|
|
mean value: 0.7619679788652766
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.6 0.71428571 0.625 0.57142857 0.57142857
|
|
0.42857143 0.8 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6441666666666667
|
|
|
|
key: train_precision
|
|
value: [0.6440678 0.66666667 0.70833333 0.72916667 0.69642857 0.6440678
|
|
0.69642857 0.64912281 0.65517241 0.62295082]
|
|
|
|
mean value: 0.6712405443226926
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 1. 1. 1. 1. 0.75 0.8 0.8 0.6 ]
|
|
|
|
mean value: 0.855
|
|
|
|
key: train_recall
|
|
value: [0.9047619 0.9047619 0.80952381 0.83333333 0.90697674 0.88372093
|
|
0.90697674 0.88095238 0.9047619 0.9047619 ]
|
|
|
|
mean value: 0.8840531561461794
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 0.8 0.7 0.7 0.7 0.475 0.775 0.65 0.675]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [0.70238095 0.72619048 0.73809524 0.76190476 0.75110742 0.69186047
|
|
0.75110742 0.70791805 0.71982281 0.68493909]
|
|
|
|
mean value: 0.7235326688815061
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.42857143 0.71428571 0.625 0.57142857 0.57142857
|
|
0.375 0.66666667 0.57142857 0.5 ]
|
|
|
|
mean value: 0.5738095238095238
|
|
|
|
key: train_jcc
|
|
value: [0.6031746 0.62295082 0.60714286 0.63636364 0.65 0.59375
|
|
0.65 0.59677419 0.61290323 0.58461538]
|
|
|
|
mean value: 0.6157674720323452
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01027441 0.00950837 0.00869203 0.00934887 0.008461 0.00858068
|
|
0.00867271 0.00951266 0.00952578 0.00945115]
|
|
|
|
mean value: 0.00920276641845703
|
|
|
|
key: score_time
|
|
value: [0.0104661 0.00862288 0.00936937 0.00844145 0.00853658 0.0085535
|
|
0.00924015 0.0092988 0.00934792 0.00935721]
|
|
|
|
mean value: 0.009123396873474122
|
|
|
|
key: test_mcc
|
|
value: [ 0.65465367 0. 0.2 -0.40824829 0.1 -0.1
|
|
0.1 -0.63245553 -0.1 0.31622777]
|
|
|
|
mean value: 0.013017761422727625
|
|
|
|
key: train_mcc
|
|
value: [0.40579843 0.40487668 0.47619048 0.42857143 0.39309028 0.45872583
|
|
0.43521595 0.459447 0.48252979 0.50666948]
|
|
|
|
mean value: 0.4451115339233095
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.5 0.6 0.3 0.55555556 0.44444444
|
|
0.55555556 0.22222222 0.44444444 0.55555556]
|
|
|
|
mean value: 0.49777777777777776
|
|
|
|
key: train_accuracy
|
|
value: [0.70238095 0.70238095 0.73809524 0.71428571 0.69411765 0.72941176
|
|
0.71764706 0.72941176 0.74117647 0.75294118]
|
|
|
|
mean value: 0.7221848739495799
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.44444444 0.6 0.36363636 0.5 0.44444444
|
|
0.5 0.36363636 0.44444444 0.33333333]
|
|
|
|
mean value: 0.47439393939393937
|
|
|
|
key: train_fscore
|
|
value: [0.69135802 0.70588235 0.73809524 0.71428571 0.675 0.73563218
|
|
0.72093023 0.71604938 0.73170732 0.74074074]
|
|
|
|
mean value: 0.7169681187009633
|
|
|
|
key: test_precision
|
|
value: [1. 0.5 0.6 0.33333333 0.5 0.4
|
|
0.5 0.33333333 0.5 1. ]
|
|
|
|
mean value: 0.5666666666666667
|
|
|
|
key: train_precision
|
|
value: [0.71794872 0.69767442 0.73809524 0.71428571 0.72972973 0.72727273
|
|
0.72093023 0.74358974 0.75 0.76923077]
|
|
|
|
mean value: 0.7308757291315431
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 0.6 0.4 0.5 0.5 0.5 0.4 0.4 0.2]
|
|
|
|
mean value: 0.45
|
|
|
|
key: train_recall
|
|
value: [0.66666667 0.71428571 0.73809524 0.71428571 0.62790698 0.74418605
|
|
0.72093023 0.69047619 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7045404208194905
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.5 0.6 0.3 0.55 0.45 0.55 0.2 0.45 0.6 ]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_roc_auc
|
|
value: [0.70238095 0.70238095 0.73809524 0.71428571 0.69490587 0.72923588
|
|
0.71760797 0.72895903 0.74086379 0.75249169]
|
|
|
|
mean value: 0.7221207087486158
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.28571429 0.42857143 0.22222222 0.33333333 0.28571429
|
|
0.33333333 0.22222222 0.28571429 0.2 ]
|
|
|
|
mean value: 0.3196825396825397
|
|
|
|
key: train_jcc
|
|
value: [0.52830189 0.54545455 0.58490566 0.55555556 0.50943396 0.58181818
|
|
0.56363636 0.55769231 0.57692308 0.58823529]
|
|
|
|
mean value: 0.5591956834631641
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00839567 0.01085949 0.00855041 0.00844908 0.00818205 0.00893188
|
|
0.00921607 0.00920868 0.00871038 0.00835061]
|
|
|
|
mean value: 0.008885431289672851
|
|
|
|
key: score_time
|
|
value: [0.011204 0.01826692 0.01001883 0.00980616 0.0092566 0.00971055
|
|
0.00965047 0.00942969 0.00974894 0.00974965]
|
|
|
|
mean value: 0.01068418025970459
|
|
|
|
key: test_mcc
|
|
value: [ 0. -0.2 0.21821789 0. 0.15811388 0.47809144
|
|
-0.55 0.1 0.35 0.31622777]
|
|
|
|
mean value: 0.08706509829950067
|
|
|
|
key: train_mcc
|
|
value: [0.38490018 0.40487668 0.33485541 0.33371191 0.24681121 0.22326873
|
|
0.31777954 0.31891336 0.34182831 0.41851636]
|
|
|
|
mean value: 0.33254616976361506
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.4 0.6 0.5 0.55555556 0.66666667
|
|
0.22222222 0.55555556 0.66666667 0.55555556]
|
|
|
|
mean value: 0.5222222222222223
|
|
|
|
key: train_accuracy
|
|
value: [0.69047619 0.70238095 0.66666667 0.66666667 0.62352941 0.61176471
|
|
0.65882353 0.65882353 0.67058824 0.70588235]
|
|
|
|
mean value: 0.6655602240896359
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.4 0.66666667 0.44444444 0.6 0.72727273
|
|
0.22222222 0.6 0.66666667 0.33333333]
|
|
|
|
mean value: 0.5105050505050505
|
|
|
|
key: train_fscore
|
|
value: [0.71111111 0.70588235 0.68181818 0.6744186 0.63636364 0.62068966
|
|
0.6741573 0.66666667 0.6744186 0.72527473]
|
|
|
|
mean value: 0.6770800842021024
|
|
|
|
key: test_precision
|
|
value: [0.5 0.4 0.57142857 0.5 0.5 0.57142857
|
|
0.2 0.6 0.75 1. ]
|
|
|
|
mean value: 0.5592857142857143
|
|
|
|
key: train_precision
|
|
value: [0.66666667 0.69767442 0.65217391 0.65909091 0.62222222 0.61363636
|
|
0.65217391 0.64444444 0.65909091 0.67346939]
|
|
|
|
mean value: 0.6540643147598225
|
|
|
|
key: test_recall
|
|
value: [0.4 0.4 0.8 0.4 0.75 1. 0.25 0.6 0.6 0.2 ]
|
|
|
|
mean value: 0.54
|
|
|
|
key: train_recall
|
|
value: [0.76190476 0.71428571 0.71428571 0.69047619 0.65116279 0.62790698
|
|
0.69767442 0.69047619 0.69047619 0.78571429]
|
|
|
|
mean value: 0.7024363233665559
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.4 0.6 0.5 0.575 0.7 0.225 0.55 0.675 0.6 ]
|
|
|
|
mean value: 0.5325
|
|
|
|
key: train_roc_auc
|
|
value: [0.69047619 0.70238095 0.66666667 0.66666667 0.62320044 0.61157254
|
|
0.65836102 0.65919158 0.67081949 0.70681063]
|
|
|
|
mean value: 0.6656146179401993
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.25 0.5 0.28571429 0.42857143 0.57142857
|
|
0.125 0.42857143 0.5 0.2 ]
|
|
|
|
mean value: 0.3575
|
|
|
|
key: train_jcc
|
|
value: [0.55172414 0.54545455 0.51724138 0.50877193 0.46666667 0.45
|
|
0.50847458 0.5 0.50877193 0.56896552]
|
|
|
|
mean value: 0.512607068252428
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00970221 0.00964284 0.0095191 0.01022768 0.00957704 0.01001263
|
|
0.0093708 0.00942683 0.00976539 0.00955868]
|
|
|
|
mean value: 0.00968031883239746
|
|
|
|
key: score_time
|
|
value: [0.00961828 0.00944138 0.00948215 0.00907469 0.00895667 0.00944567
|
|
0.00949264 0.00930762 0.00928617 0.00859022]
|
|
|
|
mean value: 0.00926954746246338
|
|
|
|
key: test_mcc
|
|
value: [ 0.6 0.2 0.2 0. 0.35 0.63245553
|
|
-0.31622777 0.35 0.31622777 0.31622777]
|
|
|
|
mean value: 0.2648683298050514
|
|
|
|
key: train_mcc
|
|
value: [0.66742381 0.71428571 0.64303944 0.5967624 0.67054264 0.64833694
|
|
0.67679754 0.62667015 0.62437683 0.64728682]
|
|
|
|
mean value: 0.6515522264881214
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 0.6 0.5 0.66666667 0.77777778
|
|
0.33333333 0.66666667 0.66666667 0.55555556]
|
|
|
|
mean value: 0.6166666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.83333333 0.85714286 0.82142857 0.79761905 0.83529412 0.82352941
|
|
0.83529412 0.81176471 0.81176471 0.82352941]
|
|
|
|
mean value: 0.8250700280112044
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.6 0.6 0.44444444 0.66666667 0.8
|
|
0.4 0.66666667 0.72727273 0.33333333]
|
|
|
|
mean value: 0.6038383838383838
|
|
|
|
key: train_fscore
|
|
value: [0.8372093 0.85714286 0.81927711 0.8045977 0.8372093 0.83146067
|
|
0.84782609 0.81818182 0.81395349 0.82352941]
|
|
|
|
mean value: 0.8290387750809622
|
|
|
|
key: test_precision
|
|
value: [0.8 0.6 0.6 0.5 0.6 0.66666667
|
|
0.33333333 0.75 0.66666667 1. ]
|
|
|
|
mean value: 0.6516666666666666
|
|
|
|
key: train_precision
|
|
value: [0.81818182 0.85714286 0.82926829 0.77777778 0.8372093 0.80434783
|
|
0.79591837 0.7826087 0.79545455 0.81395349]
|
|
|
|
mean value: 0.8111862971023669
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 0.6 0.4 0.75 1. 0.5 0.6 0.8 0.2 ]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_recall
|
|
value: [0.85714286 0.85714286 0.80952381 0.83333333 0.8372093 0.86046512
|
|
0.90697674 0.85714286 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8485603543743079
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 0.6 0.5 0.675 0.8 0.35 0.675 0.65 0.6 ]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_roc_auc
|
|
value: [0.83333333 0.85714286 0.82142857 0.79761905 0.83527132 0.8230897
|
|
0.83444075 0.81229236 0.8120155 0.82364341]
|
|
|
|
mean value: 0.8250276854928018
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.42857143 0.42857143 0.28571429 0.5 0.66666667
|
|
0.25 0.5 0.57142857 0.2 ]
|
|
|
|
mean value: 0.44976190476190475
|
|
|
|
key: train_jcc
|
|
value: [0.72 0.75 0.69387755 0.67307692 0.72 0.71153846
|
|
0.73584906 0.69230769 0.68627451 0.7 ]
|
|
|
|
mean value: 0.708292419435118
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.3987453 0.51468682 0.57014608 0.40633082 0.50504756 0.3882122
|
|
0.40108752 0.57440495 0.4462316 0.4448204 ]
|
|
|
|
mean value: 0.46497132778167727
|
|
|
|
key: score_time
|
|
value: [0.01203442 0.01194072 0.01197124 0.01195979 0.01194572 0.01197648
|
|
0.01195168 0.01194906 0.01208687 0.01194286]
|
|
|
|
mean value: 0.011975884437561035
|
|
|
|
key: test_mcc
|
|
value: [-0.21821789 -0.21821789 0.2 0.5 0.05976143 0.63245553
|
|
-0.15811388 0.47809144 -0.1 0.63245553]
|
|
|
|
mean value: 0.18082142747874252
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.4 0.4 0.6 0.7 0.55555556 0.77777778
|
|
0.44444444 0.66666667 0.44444444 0.77777778]
|
|
|
|
mean value: 0.5766666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.25 0.25 0.6 0.76923077 0.33333333 0.8
|
|
0.28571429 0.57142857 0.44444444 0.75 ]
|
|
|
|
mean value: 0.5054151404151405
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.33333333 0.33333333 0.6 0.625 0.5 0.66666667
|
|
0.33333333 1. 0.5 1. ]
|
|
|
|
mean value: 0.5891666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.2 0.2 0.6 1. 0.25 1. 0.25 0.4 0.4 0.6 ]
|
|
|
|
mean value: 0.49
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 0.4 0.6 0.7 0.525 0.8 0.425 0.7 0.45 0.8 ]
|
|
|
|
mean value: 0.58
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.14285714 0.14285714 0.42857143 0.625 0.2 0.66666667
|
|
0.16666667 0.4 0.28571429 0.6 ]
|
|
|
|
mean value: 0.36583333333333334
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01363611 0.01342154 0.01003838 0.01011848 0.00991964 0.00991988
|
|
0.00986767 0.01031065 0.01011705 0.00999522]
|
|
|
|
mean value: 0.010734462738037109
|
|
|
|
key: score_time
|
|
value: [0.01147652 0.00917077 0.0088315 0.0084331 0.00848675 0.00852704
|
|
0.00852036 0.00841403 0.0084002 0.00855517]
|
|
|
|
mean value: 0.008881545066833496
|
|
|
|
key: test_mcc
|
|
value: [1. 0.2 0.6 0.81649658 0.8 0.8
|
|
0.55 0.8 0.79056942 0.35 ]
|
|
|
|
mean value: 0.6707065995969821
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.6 0.8 0.9 0.88888889 0.88888889
|
|
0.77777778 0.88888889 0.88888889 0.66666667]
|
|
|
|
mean value: 0.83
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.6 0.8 0.88888889 0.88888889 0.88888889
|
|
0.75 0.88888889 0.90909091 0.66666667]
|
|
|
|
mean value: 0.8281313131313132
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.8 1. 0.8 0.8
|
|
0.75 1. 0.83333333 0.75 ]
|
|
|
|
mean value: 0.8333333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.8 0.8 1. 1. 0.75 0.8 1. 0.6 ]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.6 0.8 0.9 0.9 0.9 0.775 0.9 0.875 0.675]
|
|
|
|
mean value: 0.8325
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.42857143 0.66666667 0.8 0.8 0.8
|
|
0.6 0.8 0.83333333 0.5 ]
|
|
|
|
mean value: 0.7228571428571429
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.58
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08252668 0.0831151 0.08256292 0.08205557 0.08413506 0.08230066
|
|
0.08206129 0.08255148 0.08241248 0.08236241]
|
|
|
|
mean value: 0.08260836601257324
|
|
|
|
key: score_time
|
|
value: [0.0178237 0.0170083 0.01702189 0.01705647 0.01695824 0.01691341
|
|
0.01711273 0.01716065 0.01705647 0.01699781]
|
|
|
|
mean value: 0.0171109676361084
|
|
|
|
key: test_mcc
|
|
value: [-0.2 0. 0.65465367 0.5 0.35 -0.1
|
|
-0.35 0.47809144 0.35 0.47809144]
|
|
|
|
mean value: 0.2160836558175492
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.4 0.5 0.8 0.7 0.66666667 0.44444444
|
|
0.33333333 0.66666667 0.66666667 0.66666667]
|
|
|
|
mean value: 0.5844444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.44444444 0.83333333 0.76923077 0.66666667 0.44444444
|
|
0.25 0.57142857 0.66666667 0.57142857]
|
|
|
|
mean value: 0.5617643467643467
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.4 0.5 0.71428571 0.625 0.6 0.4
|
|
0.25 1. 0.75 1. ]
|
|
|
|
mean value: 0.6239285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.4 0.4 1. 1. 0.75 0.5 0.25 0.4 0.6 0.4 ]
|
|
|
|
mean value: 0.5700000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.4 0.5 0.8 0.7 0.675 0.45 0.325 0.7 0.675 0.7 ]
|
|
|
|
mean value: 0.5925
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.28571429 0.71428571 0.625 0.5 0.28571429
|
|
0.14285714 0.4 0.5 0.4 ]
|
|
|
|
mean value: 0.41035714285714286
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.008533 0.0085187 0.00847268 0.0083952 0.00864124 0.00853229
|
|
0.00878954 0.00884581 0.00939178 0.00857973]
|
|
|
|
mean value: 0.00866999626159668
|
|
|
|
key: score_time
|
|
value: [0.00860953 0.00861216 0.00846887 0.00854897 0.00849032 0.00868344
|
|
0.00847077 0.00867677 0.00852442 0.00837708]
|
|
|
|
mean value: 0.008546233177185059
|
|
|
|
key: test_mcc
|
|
value: [ 0. -0.21821789 0.81649658 0.40824829 0.31622777 0.
|
|
-0.31622777 0.15811388 0.55 0.47809144]
|
|
|
|
mean value: 0.2192732307897773
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.4 0.9 0.7 0.55555556 0.55555556
|
|
0.33333333 0.55555556 0.77777778 0.66666667]
|
|
|
|
mean value: 0.5944444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.28571429 0.25 0.90909091 0.72727273 0.66666667 0.
|
|
0.4 0.5 0.8 0.57142857]
|
|
|
|
mean value: 0.511017316017316
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.33333333 0.83333333 0.66666667 0.5 0.
|
|
0.33333333 0.66666667 0.8 1. ]
|
|
|
|
mean value: 0.5633333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.2 0.2 1. 0.8 1. 0. 0.5 0.4 0.8 0.4]
|
|
|
|
mean value: 0.53
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.4 0.9 0.7 0.6 0.5 0.35 0.575 0.775 0.7 ]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.16666667 0.14285714 0.83333333 0.57142857 0.5 0.
|
|
0.25 0.33333333 0.66666667 0.4 ]
|
|
|
|
mean value: 0.3864285714285714
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.03654218 1.05361056 1.0611093 1.02847314 1.03847742 1.04320693
|
|
1.07000065 1.04533601 1.04591656 1.09409451]
|
|
|
|
mean value: 1.0516767263412476
|
|
|
|
key: score_time
|
|
value: [0.0878253 0.08734083 0.08673406 0.08699322 0.08708954 0.08753395
|
|
0.08909678 0.08885121 0.08853221 0.08780479]
|
|
|
|
mean value: 0.08778018951416015
|
|
|
|
key: test_mcc
|
|
value: [0.6 0.2 0.81649658 0.65465367 0.35 0.47809144
|
|
0.1 0.15811388 0.35 0.47809144]
|
|
|
|
mean value: 0.4185447022111637
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 0.9 0.8 0.66666667 0.66666667
|
|
0.55555556 0.55555556 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6877777777777778
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.6 0.90909091 0.83333333 0.66666667 0.72727273
|
|
0.5 0.5 0.66666667 0.57142857]
|
|
|
|
mean value: 0.6774458874458874
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.6 0.83333333 0.71428571 0.6 0.57142857
|
|
0.5 0.66666667 0.75 1. ]
|
|
|
|
mean value: 0.7035714285714285
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 1. 1. 0.75 1. 0.5 0.4 0.6 0.4 ]
|
|
|
|
mean value: 0.705
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 0.9 0.8 0.675 0.7 0.55 0.575 0.675 0.7 ]
|
|
|
|
mean value: 0.6975
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.42857143 0.83333333 0.71428571 0.5 0.57142857
|
|
0.33333333 0.33333333 0.5 0.4 ]
|
|
|
|
mean value: 0.5280952380952381
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.77983856 0.84952569 0.85903978 0.84136844 0.89900446 0.86742282
|
|
0.89208865 0.84963059 0.90437579 0.90974164]
|
|
|
|
mean value: 0.8652036428451538
|
|
|
|
key: score_time
|
|
value: [0.20476985 0.22864652 0.22323489 0.16918874 0.19943976 0.19233608
|
|
0.15055943 0.21285725 0.13039494 0.15691781]
|
|
|
|
mean value: 0.18683452606201173
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.2 0.2 0.65465367 0.35 0.47809144
|
|
0.1 0.15811388 0.1 0.47809144]
|
|
|
|
mean value: 0.3535447022111637
|
|
|
|
key: train_mcc
|
|
value: [0.8104432 0.9047619 0.81322028 0.83356964 0.88261351 0.83554817
|
|
0.81173865 0.83554817 0.8596795 0.88444936]
|
|
|
|
mean value: 0.8471572396522087
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.6 0.6 0.8 0.66666667 0.66666667
|
|
0.55555556 0.55555556 0.55555556 0.66666667]
|
|
|
|
mean value: 0.6566666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.9047619 0.95238095 0.9047619 0.91666667 0.94117647 0.91764706
|
|
0.90588235 0.91764706 0.92941176 0.94117647]
|
|
|
|
mean value: 0.9231512605042017
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.6 0.6 0.83333333 0.66666667 0.72727273
|
|
0.5 0.5 0.6 0.57142857]
|
|
|
|
mean value: 0.6507792207792208
|
|
|
|
key: train_fscore
|
|
value: [0.90243902 0.95238095 0.9 0.91764706 0.94117647 0.91764706
|
|
0.90697674 0.91764706 0.92682927 0.9382716 ]
|
|
|
|
mean value: 0.922101524124702
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.6 0.6 0.71428571 0.6 0.57142857
|
|
0.5 0.66666667 0.6 1. ]
|
|
|
|
mean value: 0.6685714285714286
|
|
|
|
key: train_precision
|
|
value: [0.925 0.95238095 0.94736842 0.90697674 0.95238095 0.92857143
|
|
0.90697674 0.90697674 0.95 0.97435897]
|
|
|
|
mean value: 0.9350990961303078
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.6 1. 0.75 1. 0.5 0.4 0.6 0.4 ]
|
|
|
|
mean value: 0.685
|
|
|
|
key: train_recall
|
|
value: [0.88095238 0.95238095 0.85714286 0.92857143 0.93023256 0.90697674
|
|
0.90697674 0.92857143 0.9047619 0.9047619 ]
|
|
|
|
mean value: 0.9101328903654485
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.6 0.6 0.8 0.675 0.7 0.55 0.575 0.55 0.7 ]
|
|
|
|
mean value: 0.665
|
|
|
|
key: train_roc_auc
|
|
value: [0.9047619 0.95238095 0.9047619 0.91666667 0.94130676 0.91777409
|
|
0.90586932 0.91777409 0.92912514 0.94075305]
|
|
|
|
mean value: 0.9231173864894795
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.42857143 0.42857143 0.71428571 0.5 0.57142857
|
|
0.33333333 0.33333333 0.42857143 0.4 ]
|
|
|
|
mean value: 0.4971428571428571
|
|
|
|
key: train_jcc
|
|
value: [0.82222222 0.90909091 0.81818182 0.84782609 0.88888889 0.84782609
|
|
0.82978723 0.84782609 0.86363636 0.88372093]
|
|
|
|
mean value: 0.8559006627164878
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0094471 0.00841928 0.00873494 0.00907087 0.0085907 0.00852704
|
|
0.00832558 0.00844026 0.00845122 0.00849319]
|
|
|
|
mean value: 0.00865001678466797
|
|
|
|
key: score_time
|
|
value: [0.00841236 0.00860381 0.00898695 0.00929737 0.0089252 0.00838995
|
|
0.00840497 0.00834346 0.00855756 0.00841975]
|
|
|
|
mean value: 0.008634138107299804
|
|
|
|
key: test_mcc
|
|
value: [ 0.65465367 0. 0.2 -0.40824829 0.1 -0.1
|
|
0.1 -0.63245553 -0.1 0.31622777]
|
|
|
|
mean value: 0.013017761422727625
|
|
|
|
key: train_mcc
|
|
value: [0.40579843 0.40487668 0.47619048 0.42857143 0.39309028 0.45872583
|
|
0.43521595 0.459447 0.48252979 0.50666948]
|
|
|
|
mean value: 0.4451115339233095
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.5 0.6 0.3 0.55555556 0.44444444
|
|
0.55555556 0.22222222 0.44444444 0.55555556]
|
|
|
|
mean value: 0.49777777777777776
|
|
|
|
key: train_accuracy
|
|
value: [0.70238095 0.70238095 0.73809524 0.71428571 0.69411765 0.72941176
|
|
0.71764706 0.72941176 0.74117647 0.75294118]
|
|
|
|
mean value: 0.7221848739495799
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.44444444 0.6 0.36363636 0.5 0.44444444
|
|
0.5 0.36363636 0.44444444 0.33333333]
|
|
|
|
mean value: 0.47439393939393937
|
|
|
|
key: train_fscore
|
|
value: [0.69135802 0.70588235 0.73809524 0.71428571 0.675 0.73563218
|
|
0.72093023 0.71604938 0.73170732 0.74074074]
|
|
|
|
mean value: 0.7169681187009633
|
|
|
|
key: test_precision
|
|
value: [1. 0.5 0.6 0.33333333 0.5 0.4
|
|
0.5 0.33333333 0.5 1. ]
|
|
|
|
mean value: 0.5666666666666667
|
|
|
|
key: train_precision
|
|
value: [0.71794872 0.69767442 0.73809524 0.71428571 0.72972973 0.72727273
|
|
0.72093023 0.74358974 0.75 0.76923077]
|
|
|
|
mean value: 0.7308757291315431
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 0.6 0.4 0.5 0.5 0.5 0.4 0.4 0.2]
|
|
|
|
mean value: 0.45
|
|
|
|
key: train_recall
|
|
value: [0.66666667 0.71428571 0.73809524 0.71428571 0.62790698 0.74418605
|
|
0.72093023 0.69047619 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7045404208194905
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.5 0.6 0.3 0.55 0.45 0.55 0.2 0.45 0.6 ]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_roc_auc
|
|
value: [0.70238095 0.70238095 0.73809524 0.71428571 0.69490587 0.72923588
|
|
0.71760797 0.72895903 0.74086379 0.75249169]
|
|
|
|
mean value: 0.7221207087486158
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.28571429 0.42857143 0.22222222 0.33333333 0.28571429
|
|
0.33333333 0.22222222 0.28571429 0.2 ]
|
|
|
|
mean value: 0.3196825396825397
|
|
|
|
key: train_jcc
|
|
value: [0.52830189 0.54545455 0.58490566 0.55555556 0.50943396 0.58181818
|
|
0.56363636 0.55769231 0.57692308 0.58823529]
|
|
|
|
mean value: 0.5591956834631641
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.1128006 0.03412747 0.03835845 0.07483363 0.03655577 0.03809452
|
|
0.03960681 0.05563092 0.04077697 0.03882813]
|
|
|
|
mean value: 0.05096132755279541
|
|
|
|
key: score_time
|
|
value: [0.01064277 0.01110005 0.01017284 0.01070666 0.01080132 0.01021099
|
|
0.01077867 0.01061893 0.01028061 0.01011896]
|
|
|
|
mean value: 0.010543179512023926
|
|
|
|
key: test_mcc
|
|
value: [1. 0.40824829 0.6 1. 0.55 0.8
|
|
0.79056942 0.55 0.79056942 0.79056942]
|
|
|
|
mean value: 0.7279956535590147
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.7 0.8 1. 0.77777778 0.88888889
|
|
0.88888889 0.77777778 0.88888889 0.88888889]
|
|
|
|
mean value: 0.861111111111111
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.66666667 0.8 1. 0.75 0.88888889
|
|
0.85714286 0.8 0.90909091 0.90909091]
|
|
|
|
mean value: 0.8580880230880231
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.8 1. 0.75 0.8
|
|
1. 0.8 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8566666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.8 1. 0.75 1. 0.75 0.8 1. 1. ]
|
|
|
|
mean value: 0.87
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.7 0.8 1. 0.775 0.9 0.875 0.775 0.875 0.875]
|
|
|
|
mean value: 0.8575
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.5 0.66666667 1. 0.6 0.8
|
|
0.75 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.765
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.8
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02367091 0.03436327 0.01810193 0.03793502 0.04143929 0.04502439
|
|
0.03467965 0.04110503 0.04385448 0.04827666]
|
|
|
|
mean value: 0.036845064163208006
|
|
|
|
key: score_time
|
|
value: [0.02120757 0.01171589 0.01151252 0.02240467 0.0230062 0.01178575
|
|
0.02076721 0.02186203 0.02004266 0.02183557]
|
|
|
|
mean value: 0.01861400604248047
|
|
|
|
key: test_mcc
|
|
value: [ 0.81649658 0.5 0. 0.40824829 -0.55 0.15811388
|
|
0.55 0.47809144 0.15811388 0.31622777]
|
|
|
|
mean value: 0.2835291847159022
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 0.97673145
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9976731451835319
|
|
|
|
key: test_accuracy
|
|
value: [0.9 0.7 0.5 0.7 0.22222222 0.55555556
|
|
0.77777778 0.66666667 0.55555556 0.66666667]
|
|
|
|
mean value: 0.6244444444444445
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.98823529
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988235294117647
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.57142857 0.54545455 0.72727273 0.22222222 0.6
|
|
0.75 0.57142857 0.5 0.72727273]
|
|
|
|
mean value: 0.6124170274170274
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 0.98850575
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988505747126437
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 0.5 0.66666667 0.2 0.5
|
|
0.75 1. 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6783333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.97727273
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977272727272727
|
|
|
|
key: test_recall
|
|
value: [1. 0.4 0.6 0.8 0.25 0.75 0.75 0.4 0.4 0.8 ]
|
|
|
|
mean value: 0.615
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.7 0.5 0.7 0.225 0.575 0.775 0.7 0.575 0.65 ]
|
|
|
|
mean value: 0.63
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.98809524
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988095238095238
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.4 0.375 0.57142857 0.125 0.42857143
|
|
0.6 0.4 0.33333333 0.57142857]
|
|
|
|
mean value: 0.4638095238095238
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 0.97727273
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977272727272727
|
|
|
|
MCC on Blind test: -0.13
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02005243 0.00878334 0.00833106 0.00821447 0.008322 0.00838065
|
|
0.00840259 0.00831246 0.00820088 0.00836849]
|
|
|
|
mean value: 0.009536838531494141
|
|
|
|
key: score_time
|
|
value: [0.01012063 0.00871801 0.00828266 0.00834131 0.00829244 0.00826645
|
|
0.00829434 0.00833344 0.00835776 0.00830913]
|
|
|
|
mean value: 0.008531618118286132
|
|
|
|
key: test_mcc
|
|
value: [ 0.6 0.2 0.65465367 0. 0.63245553 0.47809144
|
|
-0.1 0.15811388 0.1 0.47809144]
|
|
|
|
mean value: 0.3201405973217587
|
|
|
|
key: train_mcc
|
|
value: [0.42905817 0.50128041 0.45353942 0.50014178 0.43521595 0.43538607
|
|
0.48252979 0.43601115 0.50748822 0.46034451]
|
|
|
|
mean value: 0.4640995464256339
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 0.8 0.5 0.77777778 0.66666667
|
|
0.44444444 0.55555556 0.55555556 0.66666667]
|
|
|
|
mean value: 0.6366666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.71428571 0.75 0.72619048 0.75 0.71764706 0.71764706
|
|
0.74117647 0.71764706 0.75294118 0.72941176]
|
|
|
|
mean value: 0.7316946778711485
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.6 0.83333333 0.54545455 0.8 0.72727273
|
|
0.44444444 0.5 0.6 0.57142857]
|
|
|
|
mean value: 0.6421933621933622
|
|
|
|
key: train_fscore
|
|
value: [0.72093023 0.75862069 0.73563218 0.75294118 0.72093023 0.72727273
|
|
0.75 0.72093023 0.75862069 0.73563218]
|
|
|
|
mean value: 0.738151034854417
|
|
|
|
key: test_precision
|
|
value: [0.8 0.6 0.71428571 0.5 0.66666667 0.57142857
|
|
0.4 0.66666667 0.6 1. ]
|
|
|
|
mean value: 0.6519047619047619
|
|
|
|
key: train_precision
|
|
value: [0.70454545 0.73333333 0.71111111 0.74418605 0.72093023 0.71111111
|
|
0.73333333 0.70454545 0.73333333 0.71111111]
|
|
|
|
mean value: 0.720754052149401
|
|
|
|
key: test_recall
|
|
value: [0.8 0.6 1. 0.6 1. 1. 0.5 0.4 0.6 0.4]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_recall
|
|
value: [0.73809524 0.78571429 0.76190476 0.76190476 0.72093023 0.74418605
|
|
0.76744186 0.73809524 0.78571429 0.76190476]
|
|
|
|
mean value: 0.7565891472868217
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 0.8 0.5 0.8 0.7 0.45 0.575 0.55 0.7 ]
|
|
|
|
mean value: 0.6475
|
|
|
|
key: train_roc_auc
|
|
value: [0.71428571 0.75 0.72619048 0.75 0.71760797 0.71733112
|
|
0.74086379 0.71788483 0.75332226 0.72978959]
|
|
|
|
mean value: 0.7317275747508306
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.42857143 0.71428571 0.375 0.66666667 0.57142857
|
|
0.28571429 0.33333333 0.42857143 0.4 ]
|
|
|
|
mean value: 0.4870238095238095
|
|
|
|
key: train_jcc
|
|
value: [0.56363636 0.61111111 0.58181818 0.60377358 0.56363636 0.57142857
|
|
0.6 0.56363636 0.61111111 0.58181818]
|
|
|
|
mean value: 0.5851969833101909
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00940657 0.01323438 0.01276374 0.01335931 0.01316428 0.01373863
|
|
0.01285005 0.01365685 0.01284933 0.01359248]
|
|
|
|
mean value: 0.01286156177520752
|
|
|
|
key: score_time
|
|
value: [0.00832725 0.01156807 0.01161599 0.01167846 0.01164007 0.01132965
|
|
0.01127243 0.01137853 0.01136231 0.01132703]
|
|
|
|
mean value: 0.011149978637695313
|
|
|
|
key: test_mcc
|
|
value: [ 0.33333333 0.2 0.40824829 0.5 0.05976143 0.31622777
|
|
-0.31622777 0.31622777 0.47809144 0.5976143 ]
|
|
|
|
mean value: 0.2893276568681708
|
|
|
|
key: train_mcc
|
|
value: [0.72613547 0.90578946 0.90889326 0.93094934 0.95401677 0.82687419
|
|
0.72991065 0.65607137 0.84656167 0.58402506]
|
|
|
|
mean value: 0.8069227241270543
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 0.7 0.7 0.55555556 0.66666667
|
|
0.44444444 0.66666667 0.66666667 0.77777778]
|
|
|
|
mean value: 0.6377777777777778
|
|
|
|
key: train_accuracy
|
|
value: [0.8452381 0.95238095 0.95238095 0.96428571 0.97647059 0.90588235
|
|
0.84705882 0.8 0.91764706 0.75294118]
|
|
|
|
mean value: 0.8914285714285715
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.6 0.72727273 0.76923077 0.33333333 0.57142857
|
|
0. 0.72727273 0.57142857 0.83333333]
|
|
|
|
mean value: 0.5466633366633367
|
|
|
|
key: train_fscore
|
|
value: [0.81690141 0.95348837 0.95 0.96551724 0.97619048 0.8974359
|
|
0.82191781 0.83168317 0.90909091 0.8 ]
|
|
|
|
mean value: 0.892222528117633
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.66666667 0.625 0.5 0.66666667
|
|
0. 0.66666667 1. 0.71428571]
|
|
|
|
mean value: 0.6439285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 0.93181818 1. 0.93333333 1. 1.
|
|
1. 0.71186441 1. 0.66666667]
|
|
|
|
mean value: 0.9243682588597842
|
|
|
|
key: test_recall
|
|
value: [0.2 0.6 0.8 1. 0.25 0.5 0. 0.8 0.4 1. ]
|
|
|
|
mean value: 0.555
|
|
|
|
key: train_recall
|
|
value: [0.69047619 0.97619048 0.9047619 1. 0.95348837 0.81395349
|
|
0.69767442 1. 0.83333333 1. ]
|
|
|
|
mean value: 0.8869878183831672
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.6 0.7 0.7 0.525 0.65 0.4 0.65 0.7 0.75 ]
|
|
|
|
mean value: 0.6275000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.8452381 0.95238095 0.95238095 0.96428571 0.97674419 0.90697674
|
|
0.84883721 0.80232558 0.91666667 0.75581395]
|
|
|
|
mean value: 0.8921650055370985
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.42857143 0.57142857 0.625 0.2 0.4
|
|
0. 0.57142857 0.4 0.71428571]
|
|
|
|
mean value: 0.4110714285714286
|
|
|
|
key: train_jcc
|
|
value: [0.69047619 0.91111111 0.9047619 0.93333333 0.95348837 0.81395349
|
|
0.69767442 0.71186441 0.83333333 0.66666667]
|
|
|
|
mean value: 0.8116663225531968
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01269197 0.01208448 0.01249409 0.01239371 0.01234913 0.01236677
|
|
0.01309586 0.0125196 0.02947712 0.01219749]
|
|
|
|
mean value: 0.014167022705078126
|
|
|
|
key: score_time
|
|
value: [0.01131773 0.01128864 0.01128244 0.01131725 0.01122642 0.01126981
|
|
0.01144171 0.01194239 0.01196551 0.01130867]
|
|
|
|
mean value: 0.011436057090759278
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.2 0.65465367 0.21821789 0.1 0.31622777
|
|
0.8 0.31622777 0.1 0.47809144]
|
|
|
|
mean value: 0.35167518700447364
|
|
|
|
key: train_mcc
|
|
value: [0.88320834 0.81791288 1. 0.80439967 0.86728691 0.44930828
|
|
1. 0.86248553 0.97673145 0.67169212]
|
|
|
|
mean value: 0.8333025187012472
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 0.8 0.6 0.55555556 0.55555556
|
|
0.88888889 0.55555556 0.55555556 0.66666667]
|
|
|
|
mean value: 0.6377777777777778
|
|
|
|
key: train_accuracy
|
|
value: [0.94047619 0.9047619 1. 0.89285714 0.92941176 0.67058824
|
|
1. 0.92941176 0.98823529 0.81176471]
|
|
|
|
mean value: 0.9067507002801121
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.6 0.83333333 0.5 0.5 0.66666667
|
|
0.88888889 0.33333333 0.6 0.57142857]
|
|
|
|
mean value: 0.5826984126984127
|
|
|
|
key: train_fscore
|
|
value: [0.9382716 0.91111111 1. 0.88 0.93478261 0.75438596
|
|
1. 0.925 0.98795181 0.76470588]
|
|
|
|
mean value: 0.9096208979239172
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.71428571 0.66666667 0.5 0.5
|
|
0.8 1. 0.6 1. ]
|
|
|
|
mean value: 0.7380952380952381
|
|
|
|
key: train_precision
|
|
value: [0.97435897 0.85416667 1. 1. 0.87755102 0.6056338
|
|
1. 0.97368421 1. 1. ]
|
|
|
|
mean value: 0.9285394674777021
|
|
|
|
key: test_recall
|
|
value: [0.2 0.6 1. 0.4 0.5 1. 1. 0.2 0.6 0.4]
|
|
|
|
mean value: 0.59
|
|
|
|
key: train_recall
|
|
value: [0.9047619 0.97619048 1. 0.78571429 1. 1.
|
|
1. 0.88095238 0.97619048 0.61904762]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: test_roc_auc
|
|
value: [0.6 0.6 0.8 0.6 0.55 0.6 0.9 0.6 0.55 0.7 ]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_roc_auc
|
|
value: [0.94047619 0.9047619 1. 0.89285714 0.92857143 0.66666667
|
|
1. 0.92884828 0.98809524 0.80952381]
|
|
|
|
mean value: 0.9059800664451827
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.42857143 0.71428571 0.33333333 0.33333333 0.5
|
|
0.8 0.2 0.42857143 0.4 ]
|
|
|
|
mean value: 0.4338095238095238
|
|
|
|
key: train_jcc
|
|
value: [0.88372093 0.83673469 1. 0.78571429 0.87755102 0.6056338
|
|
1. 0.86046512 0.97619048 0.61904762]
|
|
|
|
mean value: 0.8445057944566624
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0971725 0.08120632 0.08149838 0.08213544 0.08309603 0.08951998
|
|
0.08940911 0.08906174 0.08926892 0.08266449]
|
|
|
|
mean value: 0.08650329113006591
|
|
|
|
key: score_time
|
|
value: [0.01469111 0.01460528 0.01462483 0.01497602 0.0149827 0.0161984
|
|
0.0162065 0.01581621 0.01595116 0.01477909]
|
|
|
|
mean value: 0.01528313159942627
|
|
|
|
key: test_mcc
|
|
value: [1. 0.6 0.81649658 1. 1. 1.
|
|
0.79056942 0.55 0.79056942 0.79056942]
|
|
|
|
mean value: 0.833820482605401
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.8 0.9 1. 1. 1.
|
|
0.88888889 0.77777778 0.88888889 0.88888889]
|
|
|
|
mean value: 0.9144444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.8 0.90909091 1. 1. 1.
|
|
0.85714286 0.8 0.90909091 0.90909091]
|
|
|
|
mean value: 0.9184415584415585
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 0.83333333 1. 1. 1.
|
|
1. 0.8 0.83333333 0.83333333]
|
|
|
|
mean value: 0.91
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.8 1. 1. 1. 1. 0.75 0.8 1. 1. ]
|
|
|
|
mean value: 0.935
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.8 0.9 1. 1. 1. 0.875 0.775 0.875 0.875]
|
|
|
|
mean value: 0.91
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.66666667 0.83333333 1. 1. 1.
|
|
0.75 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8583333333333334
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03284931 0.03265047 0.03695679 0.04043913 0.02679896 0.02754045
|
|
0.03230286 0.03196168 0.03647447 0.02691197]
|
|
|
|
mean value: 0.03248860836029053
|
|
|
|
key: score_time
|
|
value: [0.02001953 0.01964521 0.03343034 0.01598549 0.0166657 0.01614904
|
|
0.02222133 0.02362394 0.04111886 0.02959847]
|
|
|
|
mean value: 0.023845791816711426
|
|
|
|
key: test_mcc
|
|
value: [1. 0.2 1. 1. 0.55 0.8
|
|
0.5976143 0.79056942 0.79056942 0.15811388]
|
|
|
|
mean value: 0.6886867017759806
|
|
|
|
key: train_mcc
|
|
value: [0.95346259 1. 0.95238095 1. 0.97673145 0.97673145
|
|
0.97673145 0.95293466 0.95293466 0.97673145]
|
|
|
|
mean value: 0.9718638673441794
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.6 1. 1. 0.77777778 0.88888889
|
|
0.77777778 0.88888889 0.88888889 0.55555556]
|
|
|
|
mean value: 0.8377777777777777
|
|
|
|
key: train_accuracy
|
|
value: [0.97619048 1. 0.97619048 1. 0.98823529 0.98823529
|
|
0.98823529 0.97647059 0.97647059 0.98823529]
|
|
|
|
mean value: 0.9858263305322129
|
|
|
|
key: test_fscore
|
|
value: [1. 0.6 1. 1. 0.75 0.88888889
|
|
0.66666667 0.90909091 0.90909091 0.5 ]
|
|
|
|
mean value: 0.8223737373737374
|
|
|
|
key: train_fscore
|
|
value: [0.97674419 1. 0.97619048 1. 0.98850575 0.98850575
|
|
0.98850575 0.97619048 0.97619048 0.98795181]
|
|
|
|
mean value: 0.9858784663226167
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 1. 1. 0.75 0.8
|
|
1. 0.83333333 0.83333333 0.66666667]
|
|
|
|
mean value: 0.8483333333333334
|
|
|
|
key: train_precision
|
|
value: [0.95454545 1. 0.97619048 1. 0.97727273 0.97727273
|
|
0.97727273 0.97619048 0.97619048 1. ]
|
|
|
|
mean value: 0.9814935064935065
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 1. 1. 0.75 1. 0.5 1. 1. 0.4 ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.97619048 1. 1. 1.
|
|
1. 0.97619048 0.97619048 0.97619048]
|
|
|
|
mean value: 0.9904761904761905
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.6 1. 1. 0.775 0.9 0.75 0.875 0.875 0.575]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_roc_auc
|
|
value: [0.97619048 1. 0.97619048 1. 0.98809524 0.98809524
|
|
0.98809524 0.97646733 0.97646733 0.98809524]
|
|
|
|
mean value: 0.9857696566998893
|
|
|
|
key: test_jcc
|
|
value: [1. 0.42857143 1. 1. 0.6 0.8
|
|
0.5 0.83333333 0.83333333 0.33333333]
|
|
|
|
mean value: 0.7328571428571429
|
|
|
|
key: train_jcc
|
|
value: [0.95454545 1. 0.95348837 1. 0.97727273 0.97727273
|
|
0.97727273 0.95348837 0.95348837 0.97619048]
|
|
|
|
mean value: 0.9723019228833183
|
|
|
|
MCC on Blind test: 0.8
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0129745 0.0150969 0.01496792 0.01572418 0.01578259 0.01588988
|
|
0.01592541 0.01584125 0.01593757 0.01588011]
|
|
|
|
mean value: 0.01540203094482422
|
|
|
|
key: score_time
|
|
value: [0.01132321 0.01115489 0.01170158 0.01171732 0.0116384 0.0116837
|
|
0.01170039 0.0117197 0.01168013 0.011693 ]
|
|
|
|
mean value: 0.01160123348236084
|
|
|
|
key: test_mcc
|
|
value: [-0.40824829 -0.21821789 0.6 0.65465367 0.35 0.1
|
|
-0.31622777 0.35 0.1 0.15811388]
|
|
|
|
mean value: 0.13700736069997027
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.3 0.4 0.8 0.8 0.66666667 0.55555556
|
|
0.33333333 0.66666667 0.55555556 0.55555556]
|
|
|
|
mean value: 0.5633333333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.22222222 0.25 0.8 0.83333333 0.66666667 0.5
|
|
0.4 0.66666667 0.6 0.5 ]
|
|
|
|
mean value: 0.5438888888888889
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.25 0.33333333 0.8 0.71428571 0.6 0.5
|
|
0.33333333 0.75 0.6 0.66666667]
|
|
|
|
mean value: 0.5547619047619048
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.2 0.2 0.8 1. 0.75 0.5 0.5 0.6 0.6 0.4 ]
|
|
|
|
mean value: 0.555
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.3 0.4 0.8 0.8 0.675 0.55 0.35 0.675 0.55 0.575]
|
|
|
|
mean value: 0.5675
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.125 0.14285714 0.66666667 0.71428571 0.5 0.33333333
|
|
0.25 0.5 0.42857143 0.33333333]
|
|
|
|
mean value: 0.3994047619047619
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20045638 0.19111204 0.18844604 0.19073153 0.19398952 0.19342637
|
|
0.18221998 0.19567132 0.17616749 0.19407582]
|
|
|
|
mean value: 0.19062964916229247
|
|
|
|
key: score_time
|
|
value: [0.00931382 0.00921655 0.00914001 0.00976086 0.0097115 0.00902557
|
|
0.00900292 0.00912833 0.00960612 0.00914884]
|
|
|
|
mean value: 0.009305453300476075
|
|
|
|
key: test_mcc
|
|
value: [1. 0.2 0.6 1. 0.31622777 0.8
|
|
0.55 0.55 0.79056942 0.15811388]
|
|
|
|
mean value: 0.5964911064067352
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.6 0.8 1. 0.66666667 0.88888889
|
|
0.77777778 0.77777778 0.88888889 0.55555556]
|
|
|
|
mean value: 0.7955555555555556
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.6 0.8 1. 0.57142857 0.88888889
|
|
0.75 0.8 0.90909091 0.5 ]
|
|
|
|
mean value: 0.7819408369408369
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.8 1. 0.66666667 0.8
|
|
0.75 0.8 0.83333333 0.66666667]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.6 0.8 1. 0.5 1. 0.75 0.8 1. 0.4 ]
|
|
|
|
mean value: 0.785
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.6 0.8 1. 0.65 0.9 0.775 0.775 0.875 0.575]
|
|
|
|
mean value: 0.795
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.42857143 0.66666667 1. 0.4 0.8
|
|
0.6 0.66666667 0.83333333 0.33333333]
|
|
|
|
mean value: 0.6728571428571428
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01250792 0.01356506 0.01387882 0.02099538 0.02247381 0.01404572
|
|
0.01402068 0.01413226 0.01412964 0.02106595]
|
|
|
|
mean value: 0.016081523895263673
|
|
|
|
key: score_time
|
|
value: [0.01168299 0.01167083 0.0117135 0.02127814 0.01165938 0.01172686
|
|
0.01444626 0.01430297 0.0116024 0.01208735]
|
|
|
|
mean value: 0.01321706771850586
|
|
|
|
key: test_mcc
|
|
value: [-0.40824829 0.2 0.2 -0.40824829 -0.31622777 0.31622777
|
|
0.15811388 -0.1 0.1 -0.1 ]
|
|
|
|
mean value: -0.03583826979193071
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.3 0.6 0.6 0.3 0.33333333 0.66666667
|
|
0.55555556 0.44444444 0.55555556 0.44444444]
|
|
|
|
mean value: 0.48
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.22222222 0.6 0.6 0.36363636 0.4 0.57142857
|
|
0.6 0.44444444 0.6 0.44444444]
|
|
|
|
mean value: 0.4846176046176046
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.25 0.6 0.6 0.33333333 0.33333333 0.66666667
|
|
0.5 0.5 0.6 0.5 ]
|
|
|
|
mean value: 0.48833333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.2 0.6 0.6 0.4 0.5 0.5 0.75 0.4 0.6 0.4 ]
|
|
|
|
mean value: 0.495
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.3 0.6 0.6 0.3 0.35 0.65 0.575 0.45 0.55 0.45 ]
|
|
|
|
mean value: 0.4825
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.125 0.42857143 0.42857143 0.22222222 0.25 0.4
|
|
0.42857143 0.28571429 0.42857143 0.28571429]
|
|
|
|
mean value: 0.3282936507936508
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02888322 0.03299332 0.03313494 0.0310123 0.03189206 0.03178954
|
|
0.029737 0.03184295 0.03174567 0.03208899]
|
|
|
|
mean value: 0.031511998176574706
|
|
|
|
key: score_time
|
|
value: [0.02127457 0.0222466 0.02146268 0.02396894 0.0198679 0.02113128
|
|
0.01153922 0.02148414 0.01980829 0.01768708]
|
|
|
|
mean value: 0.02004706859588623
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.21821789 0.65465367 0.81649658 0.31622777 0.35
|
|
0.55 0.8 0.35 0.79056942]
|
|
|
|
mean value: 0.5500818993638605
|
|
|
|
key: train_mcc
|
|
value: [0.95238095 0.95346259 0.97646729 0.95238095 1. 0.97673145
|
|
0.92967885 0.95396693 1. 1. ]
|
|
|
|
mean value: 0.9695069019413809
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 0.8 0.9 0.66666667 0.66666667
|
|
0.77777778 0.88888889 0.66666667 0.88888889]
|
|
|
|
mean value: 0.7655555555555555
|
|
|
|
key: train_accuracy
|
|
value: [0.97619048 0.97619048 0.98809524 0.97619048 1. 0.98823529
|
|
0.96470588 0.97647059 1. 1. ]
|
|
|
|
mean value: 0.9846078431372549
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.5 0.83333333 0.90909091 0.57142857 0.66666667
|
|
0.75 0.88888889 0.66666667 0.90909091]
|
|
|
|
mean value: 0.7445165945165945
|
|
|
|
key: train_fscore
|
|
value: [0.97619048 0.97560976 0.98795181 0.97619048 1. 0.98850575
|
|
0.96470588 0.97560976 1. 1. ]
|
|
|
|
mean value: 0.9844763901284368
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.71428571 0.83333333 0.66666667 0.6
|
|
0.75 1. 0.75 0.83333333]
|
|
|
|
mean value: 0.7814285714285715
|
|
|
|
key: train_precision
|
|
value: [0.97619048 1. 1. 0.97619048 1. 0.97727273
|
|
0.97619048 1. 1. 1. ]
|
|
|
|
mean value: 0.9905844155844156
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 1. 1. 0.5 0.75 0.75 0.8 0.6 1. ]
|
|
|
|
mean value: 0.74
|
|
|
|
key: train_recall
|
|
value: [0.97619048 0.95238095 0.97619048 0.97619048 1. 1.
|
|
0.95348837 0.95238095 1. 1. ]
|
|
|
|
mean value: 0.9786821705426356
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 0.8 0.9 0.65 0.675 0.775 0.9 0.675 0.875]
|
|
|
|
mean value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
0.765
|
|
|
|
key: train_roc_auc
|
|
value: [0.97619048 0.97619048 0.98809524 0.97619048 1. 0.98809524
|
|
0.96483942 0.97619048 1. 1. ]
|
|
|
|
mean value: 0.9845791805094131
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.33333333 0.71428571 0.83333333 0.4 0.5
|
|
0.6 0.8 0.5 0.83333333]
|
|
|
|
mean value: 0.6114285714285714
|
|
|
|
key: train_jcc
|
|
value: [0.95348837 0.95238095 0.97619048 0.95348837 1. 0.97727273
|
|
0.93181818 0.95238095 1. 1. ]
|
|
|
|
mean value: 0.9697020034229337
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.193887 0.20056534 0.18003941 0.18076944 0.18147826 0.18275189
|
|
0.1809113 0.18055868 0.23209596 0.22321177]
|
|
|
|
mean value: 0.19362690448760986
|
|
|
|
key: score_time
|
|
value: [0.02155495 0.02253103 0.02014899 0.02104568 0.02134514 0.01253247
|
|
0.02108765 0.02212143 0.01170492 0.02218437]
|
|
|
|
mean value: 0.01962566375732422
|
|
|
|
key: test_mcc
|
|
value: [0.65465367 0.21821789 0.65465367 0.81649658 0.31622777 0.35
|
|
0.55 0.8 0.63245553 0.79056942]
|
|
|
|
mean value: 0.5783274525672282
|
|
|
|
key: train_mcc
|
|
value: [0.95238095 0.95346259 0.97646729 1. 1. 1.
|
|
0.92967885 0.95396693 1. 1. ]
|
|
|
|
mean value: 0.9765956615197537
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.6 0.8 0.9 0.66666667 0.66666667
|
|
0.77777778 0.88888889 0.77777778 0.88888889]
|
|
|
|
mean value: 0.7766666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.97619048 0.97619048 0.98809524 1. 1. 1.
|
|
0.96470588 0.97647059 1. 1. ]
|
|
|
|
mean value: 0.9881652661064426
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.5 0.83333333 0.90909091 0.57142857 0.66666667
|
|
0.75 0.88888889 0.75 0.90909091]
|
|
|
|
mean value: 0.7528499278499278
|
|
|
|
key: train_fscore
|
|
value: [0.97619048 0.97560976 0.98795181 1. 1. 1.
|
|
0.96470588 0.97560976 1. 1. ]
|
|
|
|
mean value: 0.9880067677967455
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.71428571 0.83333333 0.66666667 0.6
|
|
0.75 1. 1. 0.83333333]
|
|
|
|
mean value: 0.8064285714285714
|
|
|
|
key: train_precision
|
|
value: [0.97619048 1. 1. 1. 1. 1.
|
|
0.97619048 1. 1. 1. ]
|
|
|
|
mean value: 0.9952380952380953
|
|
|
|
key: test_recall
|
|
value: [0.6 0.4 1. 1. 0.5 0.75 0.75 0.8 0.6 1. ]
|
|
|
|
mean value: 0.74
|
|
|
|
key: train_recall
|
|
value: [0.97619048 0.95238095 0.97619048 1. 1. 1.
|
|
0.95348837 0.95238095 1. 1. ]
|
|
|
|
mean value: 0.981063122923588
|
|
|
|
key: test_roc_auc
|
|
value: [0.8 0.6 0.8 0.9 0.65 0.675 0.775 0.9 0.8 0.875]
|
|
|
|
mean value: 0.7775000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.97619048 0.97619048 0.98809524 1. 1. 1.
|
|
0.96483942 0.97619048 1. 1. ]
|
|
|
|
mean value: 0.9881506090808416
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.33333333 0.71428571 0.83333333 0.4 0.5
|
|
0.6 0.8 0.6 0.83333333]
|
|
|
|
mean value: 0.6214285714285714
|
|
|
|
key: train_jcc
|
|
value: [0.95348837 0.95238095 0.97619048 1. 1. 1.
|
|
0.93181818 0.95238095 1. 1. ]
|
|
|
|
mean value: 0.9766258934863585
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02603078 0.0317347 0.0293777 0.02802157 0.02537227 0.02874088
|
|
0.02727509 0.02985263 0.02739787 0.02804685]
|
|
|
|
mean value: 0.028185033798217775
|
|
|
|
key: score_time
|
|
value: [0.01158428 0.01153493 0.01156878 0.01157403 0.01153851 0.01149273
|
|
0.0114758 0.01156187 0.01148105 0.01154375]
|
|
|
|
mean value: 0.011535573005676269
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.13483997 0.09449112 0.60714286 0.18898224 0.73214286
|
|
0.75592895 0.46428571 0.87287156 0.32732684]
|
|
|
|
mean value: 0.46780120981374096
|
|
|
|
key: train_mcc
|
|
value: [0.85294118 0.86849267 0.86948194 0.84173622 0.90025835 0.8251972
|
|
0.82788248 0.82614456 0.79688349 0.82614456]
|
|
|
|
mean value: 0.8435162666559768
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.5625 0.53333333 0.8 0.6 0.86666667
|
|
0.86666667 0.73333333 0.93333333 0.66666667]
|
|
|
|
mean value: 0.73125
|
|
|
|
key: train_accuracy
|
|
value: [0.92647059 0.93382353 0.93430657 0.91970803 0.94890511 0.91240876
|
|
0.91240876 0.91240876 0.89781022 0.91240876]
|
|
|
|
mean value: 0.9210659081150708
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.46153846 0.58823529 0.8 0.5 0.85714286
|
|
0.88888889 0.75 0.94117647 0.70588235]
|
|
|
|
mean value: 0.7242864325217266
|
|
|
|
key: train_fscore
|
|
value: [0.92647059 0.93233083 0.93333333 0.91729323 0.94736842 0.91176471
|
|
0.90769231 0.90909091 0.89393939 0.90909091]
|
|
|
|
mean value: 0.9188374628467507
|
|
|
|
key: test_precision
|
|
value: [0.75 0.6 0.5 0.75 0.6 0.85714286
|
|
0.8 0.75 0.88888889 0.66666667]
|
|
|
|
mean value: 0.7162698412698413
|
|
|
|
key: train_precision
|
|
value: [0.92647059 0.95384615 0.95454545 0.953125 0.984375 0.92537313
|
|
0.9516129 0.9375 0.921875 0.9375 ]
|
|
|
|
mean value: 0.9446223234181067
|
|
|
|
key: test_recall
|
|
value: [0.75 0.375 0.71428571 0.85714286 0.42857143 0.85714286
|
|
1. 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.7482142857142857
|
|
|
|
key: train_recall
|
|
value: [0.92647059 0.91176471 0.91304348 0.88405797 0.91304348 0.89855072
|
|
0.86764706 0.88235294 0.86764706 0.88235294]
|
|
|
|
mean value: 0.894693094629156
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.5625 0.54464286 0.80357143 0.58928571 0.86607143
|
|
0.85714286 0.73214286 0.92857143 0.66071429]
|
|
|
|
mean value: 0.7294642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.92647059 0.93382353 0.93446292 0.91997016 0.9491688 0.91251066
|
|
0.9120844 0.91219096 0.89759165 0.91219096]
|
|
|
|
mean value: 0.9210464620630862
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.3 0.41666667 0.66666667 0.33333333 0.75
|
|
0.8 0.6 0.88888889 0.54545455]
|
|
|
|
mean value: 0.5901010101010101
|
|
|
|
key: train_jcc
|
|
value: [0.8630137 0.87323944 0.875 0.84722222 0.9 0.83783784
|
|
0.83098592 0.83333333 0.80821918 0.83333333]
|
|
|
|
mean value: 0.8502184955551731
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.69898343 0.90136313 0.67159677 0.63970423 0.80942416 0.68580747
|
|
0.65936661 0.84460902 0.68487692 0.63214517]
|
|
|
|
mean value: 0.7227876901626586
|
|
|
|
key: score_time
|
|
value: [0.01445556 0.01467085 0.01498032 0.01481318 0.01489925 0.01512837
|
|
0.01468015 0.02938008 0.01484513 0.01502919]
|
|
|
|
mean value: 0.016288208961486816
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.40451992 0.46428571 0.73214286 0.47245559 0.6000992
|
|
0.75592895 0.73214286 0.875 0.47245559]
|
|
|
|
mean value: 0.6025428452234197
|
|
|
|
key: train_mcc
|
|
value: [1. 0.95681396 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9956813961931199
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.6875 0.73333333 0.86666667 0.73333333 0.8
|
|
0.86666667 0.86666667 0.93333333 0.73333333]
|
|
|
|
mean value: 0.7970833333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.97794118 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977941176470588
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.61538462 0.71428571 0.85714286 0.66666667 0.76923077
|
|
0.88888889 0.875 0.93333333 0.77777778]
|
|
|
|
mean value: 0.7875488400488401
|
|
|
|
key: train_fscore
|
|
value: [1. 0.97744361 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977443609022556
|
|
|
|
key: test_precision
|
|
value: [0.7 0.8 0.71428571 0.85714286 0.8 0.83333333
|
|
0.8 0.875 1. 0.7 ]
|
|
|
|
mean value: 0.8079761904761905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.5 0.71428571 0.85714286 0.57142857 0.71428571
|
|
1. 0.875 0.875 0.875 ]
|
|
|
|
mean value: 0.7857142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 0.95588235 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955882352941177
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.6875 0.73214286 0.86607143 0.72321429 0.79464286
|
|
0.85714286 0.86607143 0.9375 0.72321429]
|
|
|
|
mean value: 0.79375
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.97794118 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9977941176470588
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.44444444 0.55555556 0.75 0.5 0.625
|
|
0.8 0.77777778 0.875 0.63636364]
|
|
|
|
mean value: 0.660050505050505
|
|
|
|
key: train_jcc
|
|
value: [1. 0.95588235 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9955882352941177
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01208878 0.01187468 0.0089395 0.00884485 0.00917196 0.00861669
|
|
0.00855947 0.00867271 0.00901723 0.00855565]
|
|
|
|
mean value: 0.009434151649475097
|
|
|
|
key: score_time
|
|
value: [0.01585054 0.00904846 0.00892377 0.00881743 0.00884295 0.00861287
|
|
0.00854731 0.00849247 0.00860453 0.00866532]
|
|
|
|
mean value: 0.00944056510925293
|
|
|
|
key: test_mcc
|
|
value: [ 0.48038446 0.40451992 0.21821789 0.37796447 0.26189246 -0.32732684
|
|
0.28571429 0.20044593 0.02620712 0.20044593]
|
|
|
|
mean value: 0.21284656395936566
|
|
|
|
key: train_mcc
|
|
value: [0.4195732 0.47149797 0.58672638 0.38357586 0.4896487 0.43514606
|
|
0.43843163 0.45505007 0.45678289 0.41002068]
|
|
|
|
mean value: 0.45464534266878026
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.6875 0.6 0.66666667 0.6 0.33333333
|
|
0.6 0.6 0.53333333 0.6 ]
|
|
|
|
mean value: 0.5908333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.69117647 0.71323529 0.78832117 0.66423358 0.71532847 0.70072993
|
|
0.70072993 0.68613139 0.69343066 0.67153285]
|
|
|
|
mean value: 0.7024849720910262
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.73684211 0.625 0.70588235 0.66666667 0.375
|
|
0.72727273 0.7 0.66666667 0.7 ]
|
|
|
|
mean value: 0.6665235280715157
|
|
|
|
key: train_fscore
|
|
value: [0.74390244 0.76363636 0.80794702 0.73563218 0.77192982 0.75151515
|
|
0.74846626 0.75428571 0.75581395 0.73988439]
|
|
|
|
mean value: 0.7573013301019287
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.63636364 0.55555556 0.6 0.54545455 0.33333333
|
|
0.57142857 0.58333333 0.53846154 0.58333333]
|
|
|
|
mean value: 0.5562648462648463
|
|
|
|
key: train_precision
|
|
value: [0.63541667 0.64948454 0.74390244 0.60952381 0.64705882 0.64583333
|
|
0.64210526 0.61682243 0.625 0.60952381]
|
|
|
|
mean value: 0.6424671110748332
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 0.71428571 0.85714286 0.85714286 0.42857143
|
|
1. 0.875 0.875 0.875 ]
|
|
|
|
mean value: 0.8357142857142857
|
|
|
|
key: train_recall
|
|
value: [0.89705882 0.92647059 0.88405797 0.92753623 0.95652174 0.89855072
|
|
0.89705882 0.97058824 0.95588235 0.94117647]
|
|
|
|
mean value: 0.9254901960784314
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.6875 0.60714286 0.67857143 0.61607143 0.33928571
|
|
0.57142857 0.58035714 0.50892857 0.58035714]
|
|
|
|
mean value: 0.5857142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.69117647 0.71323529 0.78761722 0.66229753 0.71355499 0.69927536
|
|
0.7021526 0.68819267 0.69533248 0.67348679]
|
|
|
|
mean value: 0.7026321398124468
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.58333333 0.45454545 0.54545455 0.5 0.23076923
|
|
0.57142857 0.53846154 0.5 0.53846154]
|
|
|
|
mean value: 0.5077838827838828
|
|
|
|
key: train_jcc
|
|
value: [0.59223301 0.61764706 0.67777778 0.58181818 0.62857143 0.60194175
|
|
0.59803922 0.60550459 0.60747664 0.58715596]
|
|
|
|
mean value: 0.6098165605931479
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00927424 0.00953245 0.00893164 0.0090518 0.0091083 0.00878716
|
|
0.00883508 0.00887918 0.00904822 0.00889754]
|
|
|
|
mean value: 0.009034562110900878
|
|
|
|
key: score_time
|
|
value: [0.00911903 0.00886154 0.00872445 0.00863576 0.00858903 0.00869751
|
|
0.0092392 0.00863194 0.00863743 0.00857139]
|
|
|
|
mean value: 0.00877072811126709
|
|
|
|
key: test_mcc
|
|
value: [ 0.13483997 0.25819889 -0.26189246 0.18898224 0.20044593 0.04029115
|
|
-0.05455447 0.46770717 -0.34247476 -0.34247476]
|
|
|
|
mean value: 0.02890688964379332
|
|
|
|
key: train_mcc
|
|
value: [0.44416091 0.46159309 0.49711106 0.47966196 0.45364413 0.34665985
|
|
0.42042506 0.43514606 0.45582775 0.33273069]
|
|
|
|
mean value: 0.43269605597383165
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.625 0.4 0.6 0.6 0.53333333
|
|
0.46666667 0.66666667 0.33333333 0.33333333]
|
|
|
|
mean value: 0.5120833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.71323529 0.72058824 0.73722628 0.72992701 0.71532847 0.67153285
|
|
0.70072993 0.70072993 0.71532847 0.66423358]
|
|
|
|
mean value: 0.7068860025762129
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.57142857 0.18181818 0.5 0.4 0.36363636
|
|
0.42857143 0.54545455 0.16666667 0.16666667]
|
|
|
|
mean value: 0.37857808857808856
|
|
|
|
key: train_fscore
|
|
value: [0.66666667 0.67241379 0.69491525 0.68907563 0.66666667 0.65116279
|
|
0.64347826 0.62385321 0.65486726 0.62903226]
|
|
|
|
mean value: 0.6592131788204268
|
|
|
|
key: test_precision
|
|
value: [0.6 0.66666667 0.25 0.6 0.66666667 0.5
|
|
0.5 1. 0.25 0.25 ]
|
|
|
|
mean value: 0.5283333333333333
|
|
|
|
key: train_precision
|
|
value: [0.79591837 0.8125 0.83673469 0.82 0.8125 0.7
|
|
0.78723404 0.82926829 0.82222222 0.69642857]
|
|
|
|
mean value: 0.7912806190111401
|
|
|
|
key: test_recall
|
|
value: [0.375 0.5 0.14285714 0.42857143 0.28571429 0.28571429
|
|
0.375 0.375 0.125 0.125 ]
|
|
|
|
mean value: 0.30178571428571427
|
|
|
|
key: train_recall
|
|
value: [0.57352941 0.57352941 0.5942029 0.5942029 0.56521739 0.60869565
|
|
0.54411765 0.5 0.54411765 0.57352941]
|
|
|
|
mean value: 0.5671142369991474
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.625 0.38392857 0.58928571 0.58035714 0.51785714
|
|
0.47321429 0.6875 0.34821429 0.34821429]
|
|
|
|
mean value: 0.5116071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.71323529 0.72058824 0.73827792 0.73092498 0.71643223 0.67199488
|
|
0.69959506 0.69927536 0.71408781 0.6635763 ]
|
|
|
|
mean value: 0.7067988064791133
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.4 0.1 0.33333333 0.25 0.22222222
|
|
0.27272727 0.375 0.09090909 0.09090909]
|
|
|
|
mean value: 0.243510101010101
|
|
|
|
key: train_jcc
|
|
value: [0.5 0.50649351 0.53246753 0.52564103 0.5 0.48275862
|
|
0.47435897 0.45333333 0.48684211 0.45882353]
|
|
|
|
mean value: 0.492071862765895
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00864172 0.00820637 0.00819468 0.00819182 0.00855231 0.00828409
|
|
0.00927711 0.00862646 0.00836682 0.00838137]
|
|
|
|
mean value: 0.008472275733947755
|
|
|
|
key: score_time
|
|
value: [0.01419449 0.00942516 0.00938225 0.0093658 0.00943661 0.00941682
|
|
0.00962782 0.00939751 0.00932145 0.00941181]
|
|
|
|
mean value: 0.009897971153259277
|
|
|
|
key: test_mcc
|
|
value: [ 0.12598816 0.12598816 -0.18898224 -0.33928571 -0.07142857 0.05455447
|
|
-0.07142857 -0.49099025 -0.49099025 -0.19642857]
|
|
|
|
mean value: -0.1543003383239525
|
|
|
|
key: train_mcc
|
|
value: [0.45711975 0.47479269 0.44553401 0.40968143 0.41939006 0.43695116
|
|
0.4060076 0.38712429 0.51856637 0.48933032]
|
|
|
|
mean value: 0.4444497689246404
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.5625 0.4 0.33333333 0.46666667 0.53333333
|
|
0.46666667 0.26666667 0.26666667 0.4 ]
|
|
|
|
mean value: 0.42583333333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.72794118 0.72794118 0.72262774 0.70072993 0.7080292 0.71532847
|
|
0.70072993 0.69343066 0.75912409 0.74452555]
|
|
|
|
mean value: 0.7200407900386432
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.53333333 0.47058824 0.28571429 0.42857143 0.46153846
|
|
0.5 0.35294118 0.35294118 0.4 ]
|
|
|
|
mean value: 0.437386339151045
|
|
|
|
key: train_fscore
|
|
value: [0.71755725 0.68376068 0.72058824 0.672 0.69230769 0.69291339
|
|
0.672 0.69565217 0.7518797 0.73684211]
|
|
|
|
mean value: 0.7035501227521984
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.57142857 0.4 0.28571429 0.42857143 0.5
|
|
0.5 0.33333333 0.33333333 0.42857143]
|
|
|
|
mean value: 0.4336507936507936
|
|
|
|
key: train_precision
|
|
value: [0.74603175 0.81632653 0.73134328 0.75 0.73770492 0.75862069
|
|
0.73684211 0.68571429 0.76923077 0.75384615]
|
|
|
|
mean value: 0.7485660481968407
|
|
|
|
key: test_recall
|
|
value: [0.625 0.5 0.57142857 0.28571429 0.42857143 0.42857143
|
|
0.5 0.375 0.375 0.375 ]
|
|
|
|
mean value: 0.4464285714285714
|
|
|
|
key: train_recall
|
|
value: [0.69117647 0.58823529 0.71014493 0.60869565 0.65217391 0.63768116
|
|
0.61764706 0.70588235 0.73529412 0.72058824]
|
|
|
|
mean value: 0.6667519181585678
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.5625 0.41071429 0.33035714 0.46428571 0.52678571
|
|
0.46428571 0.25892857 0.25892857 0.40178571]
|
|
|
|
mean value: 0.42410714285714285
|
|
|
|
key: train_roc_auc
|
|
value: [0.72794118 0.72794118 0.72271952 0.70140665 0.7084399 0.7158994
|
|
0.70012788 0.69352089 0.75895141 0.74435209]
|
|
|
|
mean value: 0.7201300085251492
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.36363636 0.30769231 0.16666667 0.27272727 0.3
|
|
0.33333333 0.21428571 0.21428571 0.25 ]
|
|
|
|
mean value: 0.2839294039294039
|
|
|
|
key: train_jcc
|
|
value: [0.55952381 0.51948052 0.56321839 0.5060241 0.52941176 0.53012048
|
|
0.5060241 0.53333333 0.60240964 0.58333333]
|
|
|
|
mean value: 0.5432879464434488
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0099647 0.01075697 0.01004601 0.01000023 0.0098443 0.01120949
|
|
0.01087713 0.01050925 0.01026344 0.01005459]
|
|
|
|
mean value: 0.010352611541748047
|
|
|
|
key: score_time
|
|
value: [0.00881791 0.00963497 0.00895095 0.00901651 0.00881243 0.00980449
|
|
0.00981879 0.00906205 0.00892186 0.00888133]
|
|
|
|
mean value: 0.00917212963104248
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.37796447 0.21821789 0.33928571 0.07142857 0.32732684
|
|
0.34247476 0.33928571 0.47245559 0.32732684]
|
|
|
|
mean value: 0.34457071732742295
|
|
|
|
key: train_mcc
|
|
value: [0.70618786 0.73817324 0.75261265 0.72469196 0.64961637 0.678815
|
|
0.67983923 0.72271952 0.72271952 0.70801364]
|
|
|
|
mean value: 0.7083389000244062
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.6875 0.6 0.66666667 0.53333333 0.66666667
|
|
0.66666667 0.66666667 0.73333333 0.66666667]
|
|
|
|
mean value: 0.6699999999999999
|
|
|
|
key: train_accuracy
|
|
value: [0.85294118 0.86764706 0.87591241 0.86131387 0.82481752 0.83941606
|
|
0.83941606 0.86131387 0.86131387 0.8540146 ]
|
|
|
|
mean value: 0.85381064834693
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.66666667 0.625 0.66666667 0.53333333 0.61538462
|
|
0.73684211 0.66666667 0.77777778 0.70588235]
|
|
|
|
mean value: 0.679422018470006
|
|
|
|
key: train_fscore
|
|
value: [0.85074627 0.86153846 0.87407407 0.85714286 0.82608696 0.84057971
|
|
0.83333333 0.86131387 0.86131387 0.85294118]
|
|
|
|
mean value: 0.8519070575108975
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.71428571 0.55555556 0.625 0.5 0.66666667
|
|
0.63636364 0.71428571 0.7 0.66666667]
|
|
|
|
mean value: 0.6635966810966811
|
|
|
|
key: train_precision
|
|
value: [0.86363636 0.90322581 0.89393939 0.890625 0.82608696 0.84057971
|
|
0.859375 0.85507246 0.85507246 0.85294118]
|
|
|
|
mean value: 0.8640554334700857
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.71428571 0.71428571 0.57142857 0.57142857
|
|
0.875 0.625 0.875 0.75 ]
|
|
|
|
mean value: 0.7071428571428572
|
|
|
|
key: train_recall
|
|
value: [0.83823529 0.82352941 0.85507246 0.82608696 0.82608696 0.84057971
|
|
0.80882353 0.86764706 0.86764706 0.85294118]
|
|
|
|
mean value: 0.8406649616368287
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.6875 0.60714286 0.66964286 0.53571429 0.66071429
|
|
0.65178571 0.66964286 0.72321429 0.66071429]
|
|
|
|
mean value: 0.6678571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.85294118 0.86764706 0.87606564 0.86157289 0.82480818 0.8394075
|
|
0.83919437 0.86135976 0.86135976 0.85400682]
|
|
|
|
mean value: 0.8538363171355499
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.5 0.45454545 0.5 0.36363636 0.44444444
|
|
0.58333333 0.5 0.63636364 0.54545455]
|
|
|
|
mean value: 0.5194444444444445
|
|
|
|
key: train_jcc
|
|
value: [0.74025974 0.75675676 0.77631579 0.75 0.7037037 0.725
|
|
0.71428571 0.75641026 0.75641026 0.74358974]
|
|
|
|
mean value: 0.7422731960889856
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.81305027 0.54037261 0.55822015 0.56360364 0.71972537 0.54234982
|
|
0.55895686 0.55106568 0.69487 0.65232706]
|
|
|
|
mean value: 0.6194541454315186
|
|
|
|
key: score_time
|
|
value: [0.01205111 0.01191854 0.01198554 0.01193452 0.01194143 0.01201057
|
|
0.0119381 0.01200271 0.01193905 0.01203251]
|
|
|
|
mean value: 0.011975407600402832
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.25819889 0.46428571 0.60714286 0.73214286 0.34247476
|
|
0.6000992 0.60714286 0.47245559 0.07142857]
|
|
|
|
mean value: 0.4533335769020545
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.625 0.73333333 0.8 0.86666667 0.66666667
|
|
0.8 0.8 0.73333333 0.53333333]
|
|
|
|
mean value: 0.7245833333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.57142857 0.71428571 0.8 0.85714286 0.54545455
|
|
0.82352941 0.8 0.77777778 0.53333333]
|
|
|
|
mean value: 0.7089618877854171
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.66666667 0.71428571 0.75 0.85714286 0.75
|
|
0.77777778 0.85714286 0.7 0.57142857]
|
|
|
|
mean value: 0.7358730158730159
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.5 0.71428571 0.85714286 0.85714286 0.42857143
|
|
0.875 0.75 0.875 0.5 ]
|
|
|
|
mean value: 0.6982142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.625 0.73214286 0.80357143 0.86607143 0.65178571
|
|
0.79464286 0.80357143 0.72321429 0.53571429]
|
|
|
|
mean value: 0.7223214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4 0.55555556 0.66666667 0.75 0.375
|
|
0.7 0.66666667 0.63636364 0.36363636]
|
|
|
|
mean value: 0.5613888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01522923 0.0148294 0.01172709 0.01241922 0.01142979 0.01145053
|
|
0.01192904 0.01232505 0.01241684 0.01256514]
|
|
|
|
mean value: 0.012632131576538086
|
|
|
|
key: score_time
|
|
value: [0.01144719 0.00903249 0.00902367 0.00939679 0.0089097 0.00912046
|
|
0.00945306 0.0093224 0.0088613 0.00930214]
|
|
|
|
mean value: 0.009386920928955078
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 1. 1. 0.87287156 0.75592895 0.87287156
|
|
0.87287156 1. 0.875 0.47245559]
|
|
|
|
mean value: 0.8351940008460609
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 1. 1. 0.93333333 0.86666667 0.93333333
|
|
0.93333333 1. 0.93333333 0.73333333]
|
|
|
|
mean value: 0.9145833333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 1. 1. 0.92307692 0.83333333 0.92307692
|
|
0.94117647 1. 0.93333333 0.77777778]
|
|
|
|
mean value: 0.9155304172951232
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77777778 1. 1. 1. 1. 1.
|
|
0.88888889 1. 1. 0.7 ]
|
|
|
|
mean value: 0.9366666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 1. 0.85714286 0.71428571 0.85714286
|
|
1. 1. 0.875 0.875 ]
|
|
|
|
mean value: 0.9053571428571429
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 1. 1. 0.92857143 0.85714286 0.92857143
|
|
0.92857143 1. 0.9375 0.72321429]
|
|
|
|
mean value: 0.9116071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 1. 1. 0.85714286 0.71428571 0.85714286
|
|
0.88888889 1. 0.875 0.63636364]
|
|
|
|
mean value: 0.8528823953823954
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.79
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09628201 0.0939455 0.0950532 0.09577513 0.09506917 0.09508944
|
|
0.09705305 0.0958035 0.09578133 0.08818817]
|
|
|
|
mean value: 0.09480404853820801
|
|
|
|
key: score_time
|
|
value: [0.01847291 0.01853156 0.01697826 0.01843977 0.01702142 0.01853919
|
|
0.01865029 0.01879311 0.01888704 0.01701975]
|
|
|
|
mean value: 0.018133330345153808
|
|
|
|
key: test_mcc
|
|
value: [0.25819889 0.75 0.46428571 0.21821789 0.32732684 0.6000992
|
|
0.75592895 0.60714286 0.64465837 0.32732684]
|
|
|
|
mean value: 0.49531855375074396
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.875 0.73333333 0.6 0.66666667 0.8
|
|
0.86666667 0.8 0.8 0.66666667]
|
|
|
|
mean value: 0.7433333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.875 0.71428571 0.625 0.61538462 0.76923077
|
|
0.88888889 0.8 0.84210526 0.70588235]
|
|
|
|
mean value: 0.7502444270555726
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 0.875 0.71428571 0.55555556 0.66666667 0.83333333
|
|
0.8 0.85714286 0.72727273 0.66666667]
|
|
|
|
mean value: 0.7295923520923521
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.71428571 0.71428571 0.57142857 0.71428571
|
|
1. 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.7839285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.875 0.73214286 0.60714286 0.66071429 0.79464286
|
|
0.85714286 0.80357143 0.78571429 0.66071429]
|
|
|
|
mean value: 0.7401785714285715
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.77777778 0.55555556 0.45454545 0.44444444 0.625
|
|
0.8 0.66666667 0.72727273 0.54545455]
|
|
|
|
mean value: 0.6096717171717172
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00880289 0.00877786 0.00870657 0.00861526 0.0087831 0.00858951
|
|
0.00862694 0.00863767 0.00888395 0.00863767]
|
|
|
|
mean value: 0.008706140518188476
|
|
|
|
key: score_time
|
|
value: [0.00845671 0.00843096 0.00858974 0.00850964 0.00851679 0.00854015
|
|
0.00843453 0.00845456 0.00848818 0.00856996]
|
|
|
|
mean value: 0.00849912166595459
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.37796447 0.33928571 0.73214286 0.18898224 0.73214286
|
|
0.49099025 0.49099025 0.07142857 0.34247476]
|
|
|
|
mean value: 0.41443664482958203
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.6875 0.66666667 0.86666667 0.6 0.86666667
|
|
0.73333333 0.73333333 0.53333333 0.66666667]
|
|
|
|
mean value: 0.7041666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.66666667 0.66666667 0.85714286 0.5 0.85714286
|
|
0.71428571 0.71428571 0.53333333 0.73684211]
|
|
|
|
mean value: 0.6952248267728144
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.71428571 0.625 0.85714286 0.6 0.85714286
|
|
0.83333333 0.83333333 0.57142857 0.63636364]
|
|
|
|
mean value: 0.719469696969697
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.71428571 0.85714286 0.42857143 0.85714286
|
|
0.625 0.625 0.5 0.875 ]
|
|
|
|
mean value: 0.6857142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.6875 0.66964286 0.86607143 0.58928571 0.86607143
|
|
0.74107143 0.74107143 0.53571429 0.65178571]
|
|
|
|
mean value: 0.7035714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.5 0.5 0.75 0.33333333 0.75
|
|
0.55555556 0.55555556 0.36363636 0.58333333]
|
|
|
|
mean value: 0.5436868686868687
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.15248561 1.14350486 1.1780479 1.1918366 1.2288053 1.24421763
|
|
1.21798635 1.18645072 1.2160306 1.16259885]
|
|
|
|
mean value: 1.1921964406967163
|
|
|
|
key: score_time
|
|
value: [0.09465766 0.09445739 0.09588194 0.09657979 0.09455395 0.09597516
|
|
0.0961926 0.09639311 0.09248829 0.09559798]
|
|
|
|
mean value: 0.09527778625488281
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.62994079 0.60714286 0.64465837 0.64465837 0.6000992
|
|
0.87287156 0.76376262 0.19642857 0.64465837]
|
|
|
|
mean value: 0.5982185178509202
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.8125 0.8 0.8 0.8 0.8
|
|
0.93333333 0.86666667 0.6 0.8 ]
|
|
|
|
mean value: 0.79
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.8 0.8 0.72727273 0.72727273 0.76923077
|
|
0.94117647 0.85714286 0.625 0.84210526]
|
|
|
|
mean value: 0.7795083167606387
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.85714286 0.75 1. 1. 0.83333333
|
|
0.88888889 1. 0.625 0.72727273]
|
|
|
|
mean value: 0.8348304473304473
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.57142857 0.57142857 0.71428571
|
|
1. 0.75 0.625 1. ]
|
|
|
|
mean value: 0.7589285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.8125 0.80357143 0.78571429 0.78571429 0.79464286
|
|
0.92857143 0.875 0.59821429 0.78571429]
|
|
|
|
mean value: 0.7857142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.54545455 0.66666667 0.66666667 0.57142857 0.57142857 0.625
|
|
0.88888889 0.75 0.45454545 0.72727273]
|
|
|
|
mean value: 0.6467352092352092
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.80522847 0.83838367 0.85092449 0.92860365 0.82486296 0.88932276
|
|
0.95478344 0.91508269 0.85725498 0.85502672]
|
|
|
|
mean value: 0.8719473838806152
|
|
|
|
key: score_time
|
|
value: [0.24523139 0.22798228 0.21853042 0.21385813 0.13007426 0.19053721
|
|
0.1940155 0.19319248 0.20485663 0.21021557]
|
|
|
|
mean value: 0.2028493881225586
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.5 0.60714286 0.6000992 0.47245559 0.6000992
|
|
0.75592895 0.76376262 0.49099025 0.47245559]
|
|
|
|
mean value: 0.5640898723848521
|
|
|
|
key: train_mcc
|
|
value: [0.92657079 0.94158382 0.95630861 0.91240409 0.92710997 0.95629932
|
|
0.92787101 0.92787101 0.91277477 0.94160273]
|
|
|
|
mean value: 0.9330396116911235
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.75 0.8 0.8 0.73333333 0.8
|
|
0.86666667 0.86666667 0.73333333 0.73333333]
|
|
|
|
mean value: 0.7770833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.96323529 0.97058824 0.97810219 0.95620438 0.96350365 0.97810219
|
|
0.96350365 0.96350365 0.95620438 0.97080292]
|
|
|
|
mean value: 0.9663750536711035
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.75 0.8 0.76923077 0.66666667 0.76923077
|
|
0.88888889 0.85714286 0.71428571 0.77777778]
|
|
|
|
mean value: 0.7699105796164619
|
|
|
|
key: train_fscore
|
|
value: [0.96296296 0.97101449 0.97810219 0.95652174 0.96350365 0.97841727
|
|
0.96240602 0.96240602 0.95522388 0.97058824]
|
|
|
|
mean value: 0.9661146446416451
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.75 0.83333333 0.8 0.83333333
|
|
0.8 1. 0.83333333 0.7 ]
|
|
|
|
mean value: 0.7966666666666666
|
|
|
|
key: train_precision
|
|
value: [0.97014925 0.95714286 0.98529412 0.95652174 0.97058824 0.97142857
|
|
0.98461538 0.98461538 0.96969697 0.97058824]
|
|
|
|
mean value: 0.972064074859624
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.71428571 0.57142857 0.71428571
|
|
1. 0.75 0.625 0.875 ]
|
|
|
|
mean value: 0.7607142857142857
|
|
|
|
key: train_recall
|
|
value: [0.95588235 0.98529412 0.97101449 0.95652174 0.95652174 0.98550725
|
|
0.94117647 0.94117647 0.94117647 0.97058824]
|
|
|
|
mean value: 0.9604859335038364
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.75 0.80357143 0.79464286 0.72321429 0.79464286
|
|
0.85714286 0.875 0.74107143 0.72321429]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_roc_auc
|
|
value: [0.96323529 0.97058824 0.97815431 0.95620205 0.96355499 0.97804774
|
|
0.96334186 0.96334186 0.95609548 0.97080136]
|
|
|
|
mean value: 0.9663363171355499
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.6 0.66666667 0.625 0.5 0.625
|
|
0.8 0.75 0.55555556 0.63636364]
|
|
|
|
mean value: 0.6304040404040404
|
|
|
|
key: train_jcc
|
|
value: [0.92857143 0.94366197 0.95714286 0.91666667 0.92957746 0.95774648
|
|
0.92753623 0.92753623 0.91428571 0.94285714]
|
|
|
|
mean value: 0.9345582188784883
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02459335 0.00903463 0.00927949 0.00962591 0.00955915 0.01024914
|
|
0.01027846 0.00898671 0.0089488 0.0094049 ]
|
|
|
|
mean value: 0.010996055603027344
|
|
|
|
key: score_time
|
|
value: [0.00973821 0.00903988 0.00956082 0.00912857 0.0086503 0.00935268
|
|
0.00872087 0.00926471 0.00917768 0.00898433]
|
|
|
|
mean value: 0.009161806106567383
|
|
|
|
key: test_mcc
|
|
value: [ 0.13483997 0.25819889 -0.26189246 0.18898224 0.20044593 0.04029115
|
|
-0.05455447 0.46770717 -0.34247476 -0.34247476]
|
|
|
|
mean value: 0.02890688964379332
|
|
|
|
key: train_mcc
|
|
value: [0.44416091 0.46159309 0.49711106 0.47966196 0.45364413 0.34665985
|
|
0.42042506 0.43514606 0.45582775 0.33273069]
|
|
|
|
mean value: 0.43269605597383165
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.625 0.4 0.6 0.6 0.53333333
|
|
0.46666667 0.66666667 0.33333333 0.33333333]
|
|
|
|
mean value: 0.5120833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.71323529 0.72058824 0.73722628 0.72992701 0.71532847 0.67153285
|
|
0.70072993 0.70072993 0.71532847 0.66423358]
|
|
|
|
mean value: 0.7068860025762129
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.57142857 0.18181818 0.5 0.4 0.36363636
|
|
0.42857143 0.54545455 0.16666667 0.16666667]
|
|
|
|
mean value: 0.37857808857808856
|
|
|
|
key: train_fscore
|
|
value: [0.66666667 0.67241379 0.69491525 0.68907563 0.66666667 0.65116279
|
|
0.64347826 0.62385321 0.65486726 0.62903226]
|
|
|
|
mean value: 0.6592131788204268
|
|
|
|
key: test_precision
|
|
value: [0.6 0.66666667 0.25 0.6 0.66666667 0.5
|
|
0.5 1. 0.25 0.25 ]
|
|
|
|
mean value: 0.5283333333333333
|
|
|
|
key: train_precision
|
|
value: [0.79591837 0.8125 0.83673469 0.82 0.8125 0.7
|
|
0.78723404 0.82926829 0.82222222 0.69642857]
|
|
|
|
mean value: 0.7912806190111401
|
|
|
|
key: test_recall
|
|
value: [0.375 0.5 0.14285714 0.42857143 0.28571429 0.28571429
|
|
0.375 0.375 0.125 0.125 ]
|
|
|
|
mean value: 0.30178571428571427
|
|
|
|
key: train_recall
|
|
value: [0.57352941 0.57352941 0.5942029 0.5942029 0.56521739 0.60869565
|
|
0.54411765 0.5 0.54411765 0.57352941]
|
|
|
|
mean value: 0.5671142369991474
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.625 0.38392857 0.58928571 0.58035714 0.51785714
|
|
0.47321429 0.6875 0.34821429 0.34821429]
|
|
|
|
mean value: 0.5116071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.71323529 0.72058824 0.73827792 0.73092498 0.71643223 0.67199488
|
|
0.69959506 0.69927536 0.71408781 0.6635763 ]
|
|
|
|
mean value: 0.7067988064791133
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.4 0.1 0.33333333 0.25 0.22222222
|
|
0.27272727 0.375 0.09090909 0.09090909]
|
|
|
|
mean value: 0.243510101010101
|
|
|
|
key: train_jcc
|
|
value: [0.5 0.50649351 0.53246753 0.52564103 0.5 0.48275862
|
|
0.47435897 0.45333333 0.48684211 0.45882353]
|
|
|
|
mean value: 0.492071862765895
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.05816865 0.0500412 0.23319006 0.17314219 0.04267597 0.04388905
|
|
0.05018234 0.04884863 0.05043435 0.04911733]
|
|
|
|
mean value: 0.07996897697448731
|
|
|
|
key: score_time
|
|
value: [0.01128793 0.01123166 0.01321077 0.01083136 0.01044869 0.01024246
|
|
0.01018572 0.01038551 0.01036978 0.01029468]
|
|
|
|
mean value: 0.01084885597229004
|
|
|
|
key: test_mcc
|
|
value: [0.5 1. 0.73214286 0.75592895 0.75592895 0.73214286
|
|
0.87287156 1. 1. 0.64465837]
|
|
|
|
mean value: 0.7993673538486897
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 1. 0.86666667 0.86666667 0.86666667 0.86666667
|
|
0.93333333 1. 1. 0.8 ]
|
|
|
|
mean value: 0.895
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 1. 0.85714286 0.83333333 0.83333333 0.85714286
|
|
0.94117647 1. 1. 0.84210526]
|
|
|
|
mean value: 0.891423411469851
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.85714286 1. 1. 0.85714286
|
|
0.88888889 1. 1. 0.72727273]
|
|
|
|
mean value: 0.908044733044733
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.85714286 0.71428571 0.71428571 0.85714286
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.8892857142857142
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 1. 0.86607143 0.85714286 0.85714286 0.86607143
|
|
0.92857143 1. 1. 0.78571429]
|
|
|
|
mean value: 0.8910714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 1. 0.75 0.71428571 0.71428571 0.75
|
|
0.88888889 1. 1. 0.72727273]
|
|
|
|
mean value: 0.8144733044733045
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03010917 0.04621315 0.04542589 0.04824805 0.05247545 0.05405784
|
|
0.04591966 0.04507923 0.04471922 0.05202174]
|
|
|
|
mean value: 0.046426939964294436
|
|
|
|
key: score_time
|
|
value: [0.02060533 0.01955223 0.02189541 0.01707244 0.01545429 0.01788497
|
|
0.02010942 0.01845121 0.02185941 0.02386618]
|
|
|
|
mean value: 0.019675087928771973
|
|
|
|
key: test_mcc
|
|
value: [0.25819889 0.37796447 0.6000992 0.73214286 0.75592895 0.64465837
|
|
0.26189246 0.6000992 0.60714286 0.04029115]
|
|
|
|
mean value: 0.4878418402088337
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.6875 0.8 0.86666667 0.86666667 0.8
|
|
0.6 0.8 0.8 0.53333333]
|
|
|
|
mean value: 0.7379166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.70588235 0.76923077 0.85714286 0.83333333 0.72727273
|
|
0.5 0.82352941 0.8 0.63157895]
|
|
|
|
mean value: 0.7314637065720657
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 0.66666667 0.83333333 0.85714286 1. 1.
|
|
0.75 0.77777778 0.85714286 0.54545455]
|
|
|
|
mean value: 0.7887518037518038
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.71428571 0.85714286 0.71428571 0.57142857
|
|
0.375 0.875 0.75 0.75 ]
|
|
|
|
mean value: 0.7107142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.6875 0.79464286 0.86607143 0.85714286 0.78571429
|
|
0.61607143 0.79464286 0.80357143 0.51785714]
|
|
|
|
mean value: 0.7348214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.54545455 0.625 0.75 0.71428571 0.57142857
|
|
0.33333333 0.7 0.66666667 0.46153846]
|
|
|
|
mean value: 0.5867707292707293
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02069235 0.00884247 0.00884652 0.0086565 0.00922513 0.00900769
|
|
0.00880408 0.0087142 0.00853491 0.00863814]
|
|
|
|
mean value: 0.00999619960784912
|
|
|
|
key: score_time
|
|
value: [0.01007533 0.0088315 0.00878239 0.00842071 0.00923371 0.00909209
|
|
0.0093112 0.00847864 0.00837755 0.00923109]
|
|
|
|
mean value: 0.008983421325683593
|
|
|
|
key: test_mcc
|
|
value: [ 0.51639778 0.5 0.09449112 0.21821789 0.49099025 0.07142857
|
|
-0.13363062 0.33928571 0.21821789 0.19642857]
|
|
|
|
mean value: 0.25118271674362413
|
|
|
|
key: train_mcc
|
|
value: [0.3884493 0.50195781 0.5360985 0.44782266 0.46524407 0.44946013
|
|
0.41236007 0.52130692 0.53282182 0.43695116]
|
|
|
|
mean value: 0.4692472430944422
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.53333333 0.6 0.73333333 0.53333333
|
|
0.46666667 0.66666667 0.6 0.6 ]
|
|
|
|
mean value: 0.6233333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.69117647 0.75 0.76642336 0.72262774 0.72992701 0.72262774
|
|
0.70072993 0.75912409 0.76642336 0.71532847]
|
|
|
|
mean value: 0.7324388149420352
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.75 0.58823529 0.625 0.75 0.53333333
|
|
0.6 0.66666667 0.57142857 0.625 ]
|
|
|
|
mean value: 0.6487441643323997
|
|
|
|
key: train_fscore
|
|
value: [0.71621622 0.76056338 0.78082192 0.73972603 0.75167785 0.74324324
|
|
0.72847682 0.76923077 0.76470588 0.73469388]
|
|
|
|
mean value: 0.7489355987622406
|
|
|
|
key: test_precision
|
|
value: [0.7 0.75 0.5 0.55555556 0.66666667 0.5
|
|
0.5 0.71428571 0.66666667 0.625 ]
|
|
|
|
mean value: 0.6178174603174603
|
|
|
|
key: train_precision
|
|
value: [0.6625 0.72972973 0.74025974 0.7012987 0.7 0.69620253
|
|
0.6626506 0.73333333 0.76470588 0.6835443 ]
|
|
|
|
mean value: 0.7074224824827122
|
|
|
|
key: test_recall
|
|
value: [0.875 0.75 0.71428571 0.71428571 0.85714286 0.57142857
|
|
0.75 0.625 0.5 0.625 ]
|
|
|
|
mean value: 0.6982142857142857
|
|
|
|
key: train_recall
|
|
value: [0.77941176 0.79411765 0.82608696 0.7826087 0.8115942 0.79710145
|
|
0.80882353 0.80882353 0.76470588 0.79411765]
|
|
|
|
mean value: 0.7967391304347826
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.54464286 0.60714286 0.74107143 0.53571429
|
|
0.44642857 0.66964286 0.60714286 0.59821429]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_roc_auc
|
|
value: [0.69117647 0.75 0.76598465 0.7221867 0.72932651 0.72208014
|
|
0.70151321 0.75948423 0.76641091 0.7158994 ]
|
|
|
|
mean value: 0.7324062233589088
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.6 0.41666667 0.45454545 0.6 0.36363636
|
|
0.42857143 0.5 0.4 0.45454545]
|
|
|
|
mean value: 0.48543290043290044
|
|
|
|
key: train_jcc
|
|
value: [0.55789474 0.61363636 0.64044944 0.58695652 0.60215054 0.59139785
|
|
0.57291667 0.625 0.61904762 0.58064516]
|
|
|
|
mean value: 0.599009489452123
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0116632 0.01467299 0.01408863 0.01358342 0.01404715 0.0151732
|
|
0.01406264 0.01415348 0.01610732 0.01608157]
|
|
|
|
mean value: 0.014363360404968262
|
|
|
|
key: score_time
|
|
value: [0.00852823 0.01135421 0.01129103 0.01132727 0.01134753 0.01131463
|
|
0.01134586 0.01129699 0.01133966 0.01134062]
|
|
|
|
mean value: 0.011048603057861327
|
|
|
|
key: test_mcc
|
|
value: [0.40451992 0.51639778 0.47245559 0.64465837 0.20044593 0.6000992
|
|
0.87287156 0.56407607 0.76376262 0.07142857]
|
|
|
|
mean value: 0.5110715612053683
|
|
|
|
key: train_mcc
|
|
value: [0.94280904 0.94280904 0.60385237 0.71739374 0.8130258 0.90259957
|
|
0.83757093 0.68130314 0.75166927 0.95710706]
|
|
|
|
mean value: 0.8150139960163064
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.75 0.73333333 0.8 0.6 0.8
|
|
0.93333333 0.73333333 0.86666667 0.53333333]
|
|
|
|
mean value: 0.74375
|
|
|
|
key: train_accuracy
|
|
value: [0.97058824 0.97058824 0.76642336 0.83941606 0.89781022 0.94890511
|
|
0.91240876 0.81751825 0.86131387 0.97810219]
|
|
|
|
mean value: 0.8963074280807213
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.71428571 0.66666667 0.72727273 0.4 0.76923077
|
|
0.94117647 0.66666667 0.85714286 0.53333333]
|
|
|
|
mean value: 0.7012617310450128
|
|
|
|
key: train_fscore
|
|
value: [0.97142857 0.96969697 0.69811321 0.81034483 0.88709677 0.94656489
|
|
0.90322581 0.77477477 0.83760684 0.97744361]
|
|
|
|
mean value: 0.8776296263804431
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.83333333 0.8 1. 0.66666667 0.83333333
|
|
0.88888889 1. 1. 0.57142857]
|
|
|
|
mean value: 0.823001443001443
|
|
|
|
key: train_precision
|
|
value: [0.94444444 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9944444444444445
|
|
|
|
key: test_recall
|
|
value: [0.875 0.625 0.57142857 0.57142857 0.28571429 0.71428571
|
|
1. 0.5 0.75 0.5 ]
|
|
|
|
mean value: 0.6392857142857142
|
|
|
|
key: train_recall
|
|
value: [1. 0.94117647 0.53623188 0.68115942 0.79710145 0.89855072
|
|
0.82352941 0.63235294 0.72058824 0.95588235]
|
|
|
|
mean value: 0.7986572890025575
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.75 0.72321429 0.78571429 0.58035714 0.79464286
|
|
0.92857143 0.75 0.875 0.53571429]
|
|
|
|
mean value: 0.7410714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.97058824 0.97058824 0.76811594 0.84057971 0.89855072 0.94927536
|
|
0.91176471 0.81617647 0.86029412 0.97794118]
|
|
|
|
mean value: 0.8963874680306906
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.55555556 0.5 0.57142857 0.25 0.625
|
|
0.88888889 0.5 0.75 0.36363636]
|
|
|
|
mean value: 0.5587842712842713
|
|
|
|
key: train_jcc
|
|
value: [0.94444444 0.94117647 0.53623188 0.68115942 0.79710145 0.89855072
|
|
0.82352941 0.63235294 0.72058824 0.95588235]
|
|
|
|
mean value: 0.793101733447002
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01423287 0.01370096 0.01313996 0.0129447 0.01281047 0.01359177
|
|
0.01343393 0.01367784 0.01408195 0.0132103 ]
|
|
|
|
mean value: 0.013482475280761718
|
|
|
|
key: score_time
|
|
value: [0.0117898 0.01133347 0.01135039 0.01142049 0.01129508 0.01131368
|
|
0.01131988 0.01134515 0.01133418 0.01129484]
|
|
|
|
mean value: 0.011379694938659668
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.51639778 0.41931393 0.36689969 0.46770717 0.47245559
|
|
0.60714286 0.25 0.64465837 0.41931393]
|
|
|
|
mean value: 0.49384860039374495
|
|
|
|
key: train_mcc
|
|
value: [0.76249285 1. 0.4690195 0.19605058 0.52198402 0.91597649
|
|
0.78788403 0.57874991 0.82543222 0.63749097]
|
|
|
|
mean value: 0.6695080572599905
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.66666667 0.6 0.66666667 0.73333333
|
|
0.8 0.53333333 0.8 0.66666667]
|
|
|
|
mean value: 0.7091666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.86764706 1. 0.67883212 0.54014599 0.71532847 0.95620438
|
|
0.88321168 0.75182482 0.90510949 0.78832117]
|
|
|
|
mean value: 0.808662516101331
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.71428571 0.44444444 0.7 0.73684211 0.66666667
|
|
0.8 0.22222222 0.84210526 0.76190476]
|
|
|
|
mean value: 0.6745614035087719
|
|
|
|
key: train_fscore
|
|
value: [0.84745763 1. 0.53191489 0.68656716 0.77966102 0.95454545
|
|
0.86666667 0.66666667 0.91275168 0.82424242]
|
|
|
|
mean value: 0.8070473591837484
|
|
|
|
key: test_precision
|
|
value: [1. 0.83333333 1. 0.53846154 0.58333333 0.8
|
|
0.85714286 1. 0.72727273 0.61538462]
|
|
|
|
mean value: 0.7954928404928405
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.52272727 0.63888889 1.
|
|
1. 1. 0.83950617 0.70103093]
|
|
|
|
mean value: 0.8702153262290719
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.28571429 1. 1. 0.57142857
|
|
0.75 0.125 1. 1. ]
|
|
|
|
mean value: 0.7107142857142857
|
|
|
|
key: train_recall
|
|
value: [0.73529412 1. 0.36231884 1. 1. 0.91304348
|
|
0.76470588 0.5 1. 1. ]
|
|
|
|
mean value: 0.827536231884058
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.64285714 0.625 0.6875 0.72321429
|
|
0.80357143 0.5625 0.78571429 0.64285714]
|
|
|
|
mean value: 0.7098214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.86764706 1. 0.68115942 0.53676471 0.71323529 0.95652174
|
|
0.88235294 0.75 0.9057971 0.78985507]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.55555556 0.28571429 0.53846154 0.58333333 0.5
|
|
0.66666667 0.125 0.72727273 0.61538462]
|
|
|
|
mean value: 0.5347388722388723
|
|
|
|
key: train_jcc
|
|
value: [0.73529412 1. 0.36231884 0.52272727 0.63888889 0.91304348
|
|
0.76470588 0.5 0.83950617 0.70103093]
|
|
|
|
mean value: 0.6977515581131298
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11000919 0.0930891 0.09374738 0.0947845 0.0942843 0.09523058
|
|
0.0954597 0.09537649 0.09564161 0.09390068]
|
|
|
|
mean value: 0.09615235328674317
|
|
|
|
key: score_time
|
|
value: [0.01494956 0.01465988 0.01507306 0.01463342 0.01451349 0.014714
|
|
0.01556587 0.0148592 0.01502132 0.01471615]
|
|
|
|
mean value: 0.014870595932006837
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.75 0.73214286 1. 0.87287156 0.6000992
|
|
0.87287156 0.60714286 1. 0.64465837]
|
|
|
|
mean value: 0.7457750878552164
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.875 0.86666667 1. 0.93333333 0.8
|
|
0.93333333 0.8 1. 0.8 ]
|
|
|
|
mean value: 0.8695833333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.875 0.85714286 1. 0.92307692 0.76923077
|
|
0.94117647 0.8 1. 0.84210526]
|
|
|
|
mean value: 0.8713614636137856
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.875 0.85714286 1. 1. 0.83333333
|
|
0.88888889 0.85714286 1. 0.72727273]
|
|
|
|
mean value: 0.870544733044733
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.85714286 1. 0.85714286 0.71428571
|
|
1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.8803571428571428
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.875 0.86607143 1. 0.92857143 0.79464286
|
|
0.92857143 0.80357143 1. 0.78571429]
|
|
|
|
mean value: 0.8669642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.77777778 0.75 1. 0.85714286 0.625
|
|
0.88888889 0.66666667 1. 0.72727273]
|
|
|
|
mean value: 0.7838203463203464
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03263783 0.03535938 0.05209875 0.03529716 0.05700755 0.0501163
|
|
0.04600501 0.03978014 0.03974342 0.03964782]
|
|
|
|
mean value: 0.042769336700439455
|
|
|
|
key: score_time
|
|
value: [0.02034211 0.03128362 0.01724148 0.02669501 0.03721666 0.02389431
|
|
0.0299561 0.02407408 0.03020382 0.01743126]
|
|
|
|
mean value: 0.025833845138549805
|
|
|
|
key: test_mcc
|
|
value: [0.5 1. 0.875 0.75592895 0.75592895 0.73214286
|
|
0.87287156 0.76376262 0.875 0.47245559]
|
|
|
|
mean value: 0.7603090517211243
|
|
|
|
key: train_mcc
|
|
value: [0.98540068 1. 1. 0.98550418 0.98550418 0.98550418
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9941913214115115
|
|
|
|
key: test_accuracy
|
|
value: [0.75 1. 0.93333333 0.86666667 0.86666667 0.86666667
|
|
0.93333333 0.86666667 0.93333333 0.73333333]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_accuracy
|
|
value: [0.99264706 1. 1. 0.99270073 0.99270073 0.99270073
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9970749248604551
|
|
|
|
key: test_fscore
|
|
value: [0.75 1. 0.93333333 0.83333333 0.83333333 0.85714286
|
|
0.94117647 0.85714286 0.93333333 0.77777778]
|
|
|
|
mean value: 0.8716573295985061
|
|
|
|
key: train_fscore
|
|
value: [0.99270073 1. 1. 0.99280576 0.99280576 0.99280576
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971117996114058
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.875 1. 1. 0.85714286
|
|
0.88888889 1. 1. 0.7 ]
|
|
|
|
mean value: 0.9071031746031746
|
|
|
|
key: train_precision
|
|
value: [0.98550725 1. 1. 0.98571429 0.98571429 0.98571429
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942650103519669
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.71428571 0.71428571 0.85714286
|
|
1. 0.75 0.875 0.875 ]
|
|
|
|
mean value: 0.8535714285714285
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 1. 0.9375 0.85714286 0.85714286 0.86607143
|
|
0.92857143 0.875 0.9375 0.72321429]
|
|
|
|
mean value: 0.8732142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 1. 1. 0.99264706 0.99264706 0.99264706
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9970588235294118
|
|
|
|
key: test_jcc
|
|
value: [0.6 1. 0.875 0.71428571 0.71428571 0.75
|
|
0.88888889 0.75 0.875 0.63636364]
|
|
|
|
mean value: 0.7803823953823954
|
|
|
|
key: train_jcc
|
|
value: [0.98550725 1. 1. 0.98571429 0.98571429 0.98571429
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9942650103519669
|
|
|
|
MCC on Blind test: 0.76
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03225088 0.06091666 0.06057954 0.05072927 0.06359172 0.05203223
|
|
0.06740713 0.04722333 0.04715204 0.04933715]
|
|
|
|
mean value: 0.05312199592590332
|
|
|
|
key: score_time
|
|
value: [0.02155924 0.0206759 0.02430654 0.02199388 0.022012 0.02173805
|
|
0.02356458 0.02501893 0.02467561 0.02412295]
|
|
|
|
mean value: 0.022966766357421876
|
|
|
|
key: test_mcc
|
|
value: [0.25 0.37796447 0.6000992 0.46428571 0.18898224 0.47245559
|
|
0.32732684 0.37796447 0.19642857 0.76376262]
|
|
|
|
mean value: 0.40192697088278284
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.6875 0.8 0.73333333 0.6 0.73333333
|
|
0.66666667 0.66666667 0.6 0.86666667]
|
|
|
|
mean value: 0.6979166666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.66666667 0.76923077 0.71428571 0.5 0.66666667
|
|
0.70588235 0.61538462 0.625 0.85714286]
|
|
|
|
mean value: 0.6745259642318466
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.71428571 0.83333333 0.71428571 0.6 0.8
|
|
0.66666667 0.8 0.625 1. ]
|
|
|
|
mean value: 0.7378571428571429
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.625 0.71428571 0.71428571 0.42857143 0.57142857
|
|
0.75 0.5 0.625 0.75 ]
|
|
|
|
mean value: 0.6303571428571428
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.6875 0.79464286 0.73214286 0.58928571 0.72321429
|
|
0.66071429 0.67857143 0.59821429 0.875 ]
|
|
|
|
mean value: 0.6964285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.5 0.625 0.55555556 0.33333333 0.5
|
|
0.54545455 0.44444444 0.45454545 0.75 ]
|
|
|
|
mean value: 0.5162878787878787
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.27443743 0.25558352 0.25779271 0.25894332 0.26575208 0.2592392
|
|
0.26178789 0.26615191 0.25864005 0.25382137]
|
|
|
|
mean value: 0.26121494770050047
|
|
|
|
key: score_time
|
|
value: [0.00925946 0.00919747 0.00927258 0.00988531 0.00969195 0.00914502
|
|
0.00992846 0.00929761 0.00923038 0.00910115]
|
|
|
|
mean value: 0.00940093994140625
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 1. 0.875 0.75592895 0.75592895 0.875
|
|
0.87287156 1. 1. 0.64465837]
|
|
|
|
mean value: 0.8409328612549894
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 1. 0.93333333 0.86666667 0.86666667 0.93333333
|
|
0.93333333 1. 1. 0.8 ]
|
|
|
|
mean value: 0.9145833333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 1. 0.93333333 0.83333333 0.83333333 0.93333333
|
|
0.94117647 1. 1. 0.84210526]
|
|
|
|
mean value: 0.9140144478844169
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77777778 1. 0.875 1. 1. 0.875
|
|
0.88888889 1. 1. 0.72727273]
|
|
|
|
mean value: 0.9143939393939394
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 1. 0.71428571 0.71428571 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9303571428571429
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 1. 0.9375 0.85714286 0.85714286 0.9375
|
|
0.92857143 1. 1. 0.78571429]
|
|
|
|
mean value: 0.9116071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 1. 0.875 0.71428571 0.71428571 0.875
|
|
0.88888889 1. 1. 0.72727273]
|
|
|
|
mean value: 0.8494733044733045
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.83
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01802087 0.0349052 0.11099482 0.04823613 0.05312109 0.03334522
|
|
0.03610134 0.01956463 0.01853085 0.04480076]
|
|
|
|
mean value: 0.04176208972930908
|
|
|
|
key: score_time
|
|
value: [0.01194596 0.01227379 0.01239729 0.01331639 0.01258588 0.01520538
|
|
0.01460433 0.0146873 0.03622794 0.02320099]
|
|
|
|
mean value: 0.0166445255279541
|
|
|
|
key: test_mcc
|
|
value: [0.48038446 0.57735027 0.66143783 0.56407607 0.76376262 0.875
|
|
0.53452248 0.75592895 0.53452248 0.41931393]
|
|
|
|
mean value: 0.6166299097371694
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.75 0.8 0.73333333 0.86666667 0.93333333
|
|
0.73333333 0.86666667 0.73333333 0.66666667]
|
|
|
|
mean value: 0.7770833333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.8 0.82352941 0.77777778 0.875 0.93333333
|
|
0.8 0.88888889 0.8 0.76190476]
|
|
|
|
mean value: 0.822233893557423
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.66666667 0.7 0.63636364 0.77777778 0.875
|
|
0.66666667 0.8 0.66666667 0.61538462]
|
|
|
|
mean value: 0.7019910644910645
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.75 0.8125 0.75 0.875 0.9375
|
|
0.71428571 0.85714286 0.71428571 0.64285714]
|
|
|
|
mean value: 0.7741071428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.66666667 0.7 0.63636364 0.77777778 0.875
|
|
0.66666667 0.8 0.66666667 0.61538462]
|
|
|
|
mean value: 0.7019910644910645
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03271985 0.03269958 0.0327177 0.03668809 0.04283381 0.04647923
|
|
0.03452468 0.03292489 0.03290701 0.03314948]
|
|
|
|
mean value: 0.035764431953430174
|
|
|
|
key: score_time
|
|
value: [0.02335835 0.01643848 0.01999688 0.02087951 0.02671218 0.01170707
|
|
0.0223546 0.02274776 0.0231638 0.01162577]
|
|
|
|
mean value: 0.019898438453674318
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.28867513 0.6000992 0.73214286 0.47245559 0.6000992
|
|
0.87287156 0.73214286 0.73214286 0.32732684]
|
|
|
|
mean value: 0.5857956089880835
|
|
|
|
key: train_mcc
|
|
value: [1. 0.98540068 0.97122151 0.98550725 0.97122151 0.98550725
|
|
0.97080136 0.95629932 0.95629932 0.97080136]
|
|
|
|
mean value: 0.9753059553605798
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.625 0.8 0.86666667 0.73333333 0.8
|
|
0.93333333 0.86666667 0.86666667 0.66666667]
|
|
|
|
mean value: 0.7908333333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99264706 0.98540146 0.99270073 0.98540146 0.99270073
|
|
0.98540146 0.97810219 0.97810219 0.98540146]
|
|
|
|
mean value: 0.9875858737655646
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.5 0.76923077 0.85714286 0.66666667 0.76923077
|
|
0.94117647 0.875 0.875 0.70588235]
|
|
|
|
mean value: 0.7709329885800473
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99259259 0.98529412 0.99270073 0.98529412 0.99270073
|
|
0.98529412 0.97777778 0.97777778 0.98529412]
|
|
|
|
mean value: 0.9874726078590399
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.83333333 0.85714286 0.8 0.83333333
|
|
0.88888889 0.875 0.875 0.66666667]
|
|
|
|
mean value: 0.812936507936508
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98529412 0.98507463 0.98507463 0.98529412]
|
|
|
|
mean value: 0.9940737489025461
|
|
|
|
key: test_recall
|
|
value: [0.75 0.375 0.71428571 0.85714286 0.57142857 0.71428571
|
|
1. 0.875 0.875 0.75 ]
|
|
|
|
mean value: 0.7482142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 0.98529412 0.97101449 0.98550725 0.97101449 0.98550725
|
|
0.98529412 0.97058824 0.97058824 0.98529412]
|
|
|
|
mean value: 0.9810102301790281
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_7030.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.625 0.79464286 0.86607143 0.72321429 0.79464286
|
|
0.92857143 0.86607143 0.86607143 0.66071429]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99264706 0.98550725 0.99275362 0.98550725 0.99275362
|
|
0.98540068 0.97804774 0.97804774 0.98540068]
|
|
|
|
mean value: 0.9876065643648764
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.33333333 0.625 0.75 0.5 0.625
|
|
0.88888889 0.77777778 0.77777778 0.54545455]
|
|
|
|
mean value: 0.6423232323232323
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98529412 0.97101449 0.98550725 0.97101449 0.98550725
|
|
0.97101449 0.95652174 0.95652174 0.97101449]
|
|
|
|
mean value: 0.9753410059676044
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.26444864 0.13244057 0.16870999 0.27039862 0.21492577 0.21021795
|
|
0.19610572 0.20581913 0.20611358 0.20624232]
|
|
|
|
mean value: 0.20754222869873046
|
|
|
|
key: score_time
|
|
value: [0.02808881 0.01255798 0.02138734 0.02434325 0.02104497 0.02034402
|
|
0.02206707 0.0220139 0.02156305 0.02118802]
|
|
|
|
mean value: 0.02145984172821045
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.40451992 0.75592895 0.46428571 0.47245559 0.87287156
|
|
1. 0.87287156 0.73214286 0.32732684]
|
|
|
|
mean value: 0.628036745643766
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.98550418 1. 0.97122151 1.
|
|
1. 1. 1. 0.97080136]
|
|
|
|
mean value: 0.9927527053335676
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.6875 0.86666667 0.73333333 0.73333333 0.93333333
|
|
1. 0.93333333 0.86666667 0.66666667]
|
|
|
|
mean value: 0.8108333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99270073 1. 0.98540146 1.
|
|
1. 1. 1. 0.98540146]
|
|
|
|
mean value: 0.9963503649635037
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.61538462 0.83333333 0.71428571 0.66666667 0.92307692
|
|
1. 0.94117647 0.875 0.70588235]
|
|
|
|
mean value: 0.7941472742943331
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99280576 1. 0.98529412 1.
|
|
1. 1. 1. 0.98529412]
|
|
|
|
mean value: 0.9963393990689802
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.8 1. 0.71428571 0.8 1.
|
|
1. 0.88888889 0.875 0.66666667]
|
|
|
|
mean value: 0.8459126984126984
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.98571429 1. 1. 1.
|
|
1. 1. 1. 0.98529412]
|
|
|
|
mean value: 0.9971008403361344
|
|
|
|
key: test_recall
|
|
value: [0.625 0.5 0.71428571 0.71428571 0.57142857 0.85714286
|
|
1. 1. 0.875 0.75 ]
|
|
|
|
mean value: 0.7607142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.97101449 1.
|
|
1. 1. 1. 0.98529412]
|
|
|
|
mean value: 0.9956308610400681
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.6875 0.85714286 0.73214286 0.72321429 0.92857143
|
|
1. 0.92857143 0.86607143 0.66071429]
|
|
|
|
mean value: 0.8071428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99264706 1. 0.98550725 1.
|
|
1. 1. 1. 0.98540068]
|
|
|
|
mean value: 0.9963554987212276
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.44444444 0.71428571 0.55555556 0.5 0.85714286
|
|
1. 0.88888889 0.77777778 0.54545455]
|
|
|
|
mean value: 0.6783549783549784
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.98571429 1. 0.97101449 1.
|
|
1. 1. 1. 0.97101449]
|
|
|
|
mean value: 0.9927743271221532
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|