18962 lines
913 KiB
Text
18962 lines
913 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_rt.py:550: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 424
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 424
|
|
ncols: 265
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 102
|
|
log10_or_mychisq 102
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 166
|
|
No. of categorical features: 7
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({0: 120, 1: 119}) Data dim: (239, 173)
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data: REVERSE training
|
|
imputed values: training set
|
|
actual values: blind test set
|
|
Train data size: (239, 173)
|
|
Test data size: (185, 173)
|
|
y_train numbers: Counter({0: 120, 1: 119})
|
|
y_train ratio: 1.0084033613445378
|
|
|
|
y_test_numbers: Counter({1: 114, 0: 71})
|
|
y_test ratio: 0.6228070175438597
|
|
-------------------------------------------------------------
|
|
Simple Random OverSampling
|
|
Counter({0: 120, 1: 120})
|
|
(240, 173)
|
|
Simple Random UnderSampling
|
|
Counter({0: 119, 1: 119})
|
|
(238, 173)
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 120, 1: 120})
|
|
(240, 173)
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 120, 1: 120})
|
|
(240, 173)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: REVERSE training
|
|
|
|
Gene name: pncA
|
|
Drug name: pyrazinamide
|
|
|
|
Output directory: /home/tanu/git/Data/pyrazinamide/output/ml/tts_rt/
|
|
|
|
Sanity checks:
|
|
Total input features: 173
|
|
|
|
Training data size: (239, 173)
|
|
Test data size: (185, 173)
|
|
|
|
Target feature numbers (training data): Counter({0: 120, 1: 119})
|
|
Target features ratio (training data: 1.0084033613445378
|
|
|
|
Target feature numbers (test data): Counter({1: 114, 0: 71})
|
|
Target features ratio (test data): 0.6228070175438597
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 34
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03394842 0.02895188 0.03221607 0.03293228 0.04543996 0.03000593
|
|
0.03111315 0.03325677 0.03014874 0.03099895]
|
|
|
|
mean value: 0.032901215553283694
|
|
|
|
key: score_time
|
|
value: [0.01245475 0.01199245 0.01205087 0.01204062 0.01212907 0.01196837
|
|
0.01194596 0.01205087 0.01186895 0.01187897]
|
|
|
|
mean value: 0.012038087844848633
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.75261781 0.64168895 0.58536941 0.60246408 0.53033009
|
|
0.58536941 0.6761234 0.83333333 0.56490196]
|
|
|
|
mean value: 0.6357567832334812
|
|
|
|
key: train_mcc
|
|
value: [0.77022946 0.81454556 0.79593084 0.78693949 0.82418184 0.80641659
|
|
0.80556067 0.78889274 0.78777764 0.80642024]
|
|
|
|
mean value: 0.7986895083179898
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.875 0.79166667 0.79166667 0.79166667 0.75
|
|
0.79166667 0.83333333 0.91666667 0.7826087 ]
|
|
|
|
mean value: 0.8115942028985507
|
|
|
|
key: train_accuracy
|
|
value: [0.88372093 0.90697674 0.89767442 0.89302326 0.91162791 0.90232558
|
|
0.90232558 0.89302326 0.89302326 0.90277778]
|
|
|
|
mean value: 0.8986498708010335
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.86956522 0.82758621 0.8 0.81481481 0.7
|
|
0.8 0.84615385 0.91666667 0.76190476]
|
|
|
|
mean value: 0.8119300209480119
|
|
|
|
key: train_fscore
|
|
value: [0.88789238 0.90825688 0.89908257 0.89497717 0.91324201 0.90497738
|
|
0.90410959 0.89686099 0.8959276 0.90497738]
|
|
|
|
mean value: 0.9010303932834448
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.90909091 0.70588235 0.76923077 0.73333333 0.875
|
|
0.76923077 0.78571429 0.91666667 0.8 ]
|
|
|
|
mean value: 0.8082330904389728
|
|
|
|
key: train_precision
|
|
value: [0.85344828 0.89189189 0.88288288 0.875 0.89285714 0.87719298
|
|
0.88392857 0.86206897 0.86842105 0.88495575]
|
|
|
|
mean value: 0.8772647517739908
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 1. 0.83333333 0.91666667 0.58333333
|
|
0.83333333 0.91666667 0.91666667 0.72727273]
|
|
|
|
mean value: 0.831060606060606
|
|
|
|
key: train_recall
|
|
value: [0.92523364 0.92523364 0.91588785 0.91588785 0.93457944 0.93457944
|
|
0.92523364 0.93457944 0.92523364 0.92592593]
|
|
|
|
mean value: 0.9262374524056767
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.875 0.79166667 0.79166667 0.79166667 0.75
|
|
0.79166667 0.83333333 0.91666667 0.78030303]
|
|
|
|
mean value: 0.8113636363636364
|
|
|
|
key: train_roc_auc
|
|
value: [0.88391312 0.90706127 0.89775874 0.89312911 0.91173416 0.9024749
|
|
0.90243164 0.89321565 0.89317238 0.90277778]
|
|
|
|
mean value: 0.8987668743509866
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.76923077 0.70588235 0.66666667 0.6875 0.53846154
|
|
0.66666667 0.73333333 0.84615385 0.61538462]
|
|
|
|
mean value: 0.6872136931695755
|
|
|
|
key: train_jcc
|
|
value: [0.7983871 0.83193277 0.81666667 0.80991736 0.84033613 0.82644628
|
|
0.825 0.81300813 0.81147541 0.82644628]
|
|
|
|
mean value: 0.8199616128276623
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.86901093 0.75784445 0.80631161 0.83643413 0.7593751 0.88770938
|
|
0.73867297 0.7577157 0.95151854 0.74281764]
|
|
|
|
mean value: 0.8107410430908203
|
|
|
|
key: score_time
|
|
value: [0.01209021 0.01205754 0.01216173 0.01274061 0.01207447 0.01208067
|
|
0.01207066 0.01211143 0.01211905 0.01203394]
|
|
|
|
mean value: 0.012154030799865722
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.75261781 0.64168895 0.58536941 0.53033009 0.53033009
|
|
0.58536941 0.6761234 0.6761234 0.65151515]
|
|
|
|
mean value: 0.621483710880162
|
|
|
|
key: train_mcc
|
|
value: [0.73420542 0.7780095 0.78777764 0.7587014 0.74008668 0.7802162
|
|
0.76913868 0.76913868 0.75938024 0.75158034]
|
|
|
|
mean value: 0.7628234782601825
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.875 0.79166667 0.79166667 0.75 0.75
|
|
0.79166667 0.83333333 0.83333333 0.82608696]
|
|
|
|
mean value: 0.8034420289855072
|
|
|
|
key: train_accuracy
|
|
value: [0.86511628 0.88837209 0.89302326 0.87906977 0.86976744 0.88837209
|
|
0.88372093 0.88372093 0.87906977 0.875 ]
|
|
|
|
mean value: 0.8805232558139535
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.86956522 0.82758621 0.8 0.78571429 0.7
|
|
0.8 0.84615385 0.84615385 0.81818182]
|
|
|
|
mean value: 0.8075963916143827
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.87111111 0.89090909 0.8959276 0.88073394 0.87155963 0.89285714
|
|
0.88687783 0.88687783 0.88181818 0.87892377]
|
|
|
|
mean value: 0.8837596129411874
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.90909091 0.70588235 0.76923077 0.6875 0.875
|
|
0.76923077 0.78571429 0.78571429 0.81818182]
|
|
|
|
mean value: 0.7923727008285832
|
|
|
|
key: train_precision
|
|
value: [0.83050847 0.86725664 0.86842105 0.86486486 0.85585586 0.85470085
|
|
0.85964912 0.85964912 0.85840708 0.85217391]
|
|
|
|
mean value: 0.8571486978101098
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 1. 0.83333333 0.91666667 0.58333333
|
|
0.83333333 0.91666667 0.91666667 0.81818182]
|
|
|
|
mean value: 0.8401515151515152
|
|
|
|
key: train_recall
|
|
value: [0.91588785 0.91588785 0.92523364 0.89719626 0.88785047 0.93457944
|
|
0.91588785 0.91588785 0.90654206 0.90740741]
|
|
|
|
mean value: 0.9122360678435445
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.875 0.79166667 0.79166667 0.75 0.75
|
|
0.79166667 0.83333333 0.83333333 0.82575758]
|
|
|
|
mean value: 0.803409090909091
|
|
|
|
key: train_roc_auc
|
|
value: [0.86535133 0.88849948 0.89317238 0.87915369 0.86985116 0.88858602
|
|
0.88386985 0.88386985 0.87919695 0.875 ]
|
|
|
|
mean value: 0.8806550709588092
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.76923077 0.70588235 0.66666667 0.64705882 0.53846154
|
|
0.66666667 0.73333333 0.73333333 0.69230769]
|
|
|
|
mean value: 0.679579831932773
|
|
|
|
key: train_jcc
|
|
value: [0.77165354 0.80327869 0.81147541 0.78688525 0.77235772 0.80645161
|
|
0.79674797 0.79674797 0.78861789 0.784 ]
|
|
|
|
mean value: 0.7918216045188055
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01307511 0.01253748 0.00938439 0.00916386 0.00911689 0.01020527
|
|
0.01011682 0.01027584 0.01017213 0.01063657]
|
|
|
|
mean value: 0.010468435287475587
|
|
|
|
key: score_time
|
|
value: [0.01179338 0.01028872 0.0090909 0.00874138 0.0089705 0.00970244
|
|
0.00955105 0.00958204 0.00964665 0.00971246]
|
|
|
|
mean value: 0.009707951545715332
|
|
|
|
key: test_mcc
|
|
value: [ 0.38490018 0.43033148 -0.2236068 0.35355339 0.64168895 0.60246408
|
|
0.1767767 0.70710678 0.60246408 0.58930667]
|
|
|
|
mean value: 0.426498549835703
|
|
|
|
key: train_mcc
|
|
value: [0.51291722 0.51210342 0.54948685 0.50693341 0.49306533 0.50903165
|
|
0.4861266 0.48500475 0.4889469 0.50251891]
|
|
|
|
mean value: 0.5046135029902341
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.70833333 0.41666667 0.66666667 0.79166667 0.79166667
|
|
0.58333333 0.83333333 0.79166667 0.7826087 ]
|
|
|
|
mean value: 0.7032608695652174
|
|
|
|
key: train_accuracy
|
|
value: [0.73953488 0.72093023 0.7627907 0.74883721 0.73023256 0.73488372
|
|
0.73023256 0.7255814 0.7255814 0.73148148]
|
|
|
|
mean value: 0.7350086132644272
|
|
|
|
key: test_fscore
|
|
value: [0.73333333 0.74074074 0.5625 0.71428571 0.82758621 0.81481481
|
|
0.64285714 0.85714286 0.81481481 0.8 ]
|
|
|
|
mean value: 0.750807562488597
|
|
|
|
key: train_fscore
|
|
value: [0.77777778 0.7761194 0.79183673 0.76923077 0.76984127 0.77647059
|
|
0.76612903 0.76679842 0.76862745 0.7751938 ]
|
|
|
|
mean value: 0.7738025243424465
|
|
|
|
key: test_precision
|
|
value: [0.61111111 0.66666667 0.45 0.625 0.70588235 0.73333333
|
|
0.5625 0.75 0.73333333 0.71428571]
|
|
|
|
mean value: 0.6552112511671335
|
|
|
|
key: train_precision
|
|
value: [0.67586207 0.64596273 0.70289855 0.70866142 0.66896552 0.66891892
|
|
0.67375887 0.66438356 0.66216216 0.66666667]
|
|
|
|
mean value: 0.6738240461813434
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.75 0.83333333 1. 0.91666667
|
|
0.75 1. 0.91666667 0.90909091]
|
|
|
|
mean value: 0.8825757575757576
|
|
|
|
key: train_recall
|
|
value: [0.91588785 0.97196262 0.90654206 0.8411215 0.90654206 0.92523364
|
|
0.88785047 0.90654206 0.91588785 0.92592593]
|
|
|
|
mean value: 0.910349601938387
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.70833333 0.41666667 0.66666667 0.79166667 0.79166667
|
|
0.58333333 0.83333333 0.79166667 0.78787879]
|
|
|
|
mean value: 0.7037878787878789
|
|
|
|
key: train_roc_auc
|
|
value: [0.74035133 0.72209242 0.76345621 0.74926445 0.73104881 0.73576497
|
|
0.73096227 0.72641918 0.72646244 0.73148148]
|
|
|
|
mean value: 0.735730356524749
|
|
|
|
key: test_jcc
|
|
value: [0.57894737 0.58823529 0.39130435 0.55555556 0.70588235 0.6875
|
|
0.47368421 0.75 0.6875 0.66666667]
|
|
|
|
mean value: 0.6085275796054501
|
|
|
|
key: train_jcc
|
|
value: [0.63636364 0.63414634 0.65540541 0.625 0.62580645 0.63461538
|
|
0.62091503 0.62179487 0.62420382 0.63291139]
|
|
|
|
mean value: 0.6311162337996469
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01122594 0.01032996 0.0103538 0.01023841 0.01048398 0.01038909
|
|
0.01046562 0.0103941 0.00965381 0.00983214]
|
|
|
|
mean value: 0.010336685180664062
|
|
|
|
key: score_time
|
|
value: [0.00994563 0.00961161 0.00965428 0.00908828 0.00969028 0.00971985
|
|
0.00960493 0.00939488 0.00967574 0.00929523]
|
|
|
|
mean value: 0.009568071365356446
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.35355339 0. 0.50709255 0.33333333 0.58536941
|
|
0.25819889 0.66666667 0.84515425 0.82575758]
|
|
|
|
mean value: 0.49604954776735644
|
|
|
|
key: train_mcc
|
|
value: [0.59116891 0.60929387 0.63724472 0.64842315 0.66084467 0.61888689
|
|
0.67521245 0.6335132 0.62092317 0.6049981 ]
|
|
|
|
mean value: 0.6300509127566093
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.66666667 0.5 0.75 0.66666667 0.79166667
|
|
0.625 0.83333333 0.91666667 0.91304348]
|
|
|
|
mean value: 0.7454710144927537
|
|
|
|
key: train_accuracy
|
|
value: [0.79534884 0.80465116 0.81860465 0.82325581 0.82790698 0.80930233
|
|
0.8372093 0.81395349 0.80930233 0.80092593]
|
|
|
|
mean value: 0.8140460809646857
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.6 0.53846154 0.76923077 0.66666667 0.8
|
|
0.66666667 0.83333333 0.92307692 0.90909091]
|
|
|
|
mean value: 0.748913550217898
|
|
|
|
key: train_fscore
|
|
value: [0.79816514 0.80373832 0.81860465 0.82882883 0.83700441 0.81105991
|
|
0.84018265 0.8245614 0.8161435 0.81057269]
|
|
|
|
mean value: 0.8188861485376868
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.75 0.5 0.71428571 0.66666667 0.76923077
|
|
0.6 0.83333333 0.85714286 0.90909091]
|
|
|
|
mean value: 0.7417932067932068
|
|
|
|
key: train_precision
|
|
value: [0.78378378 0.80373832 0.81481481 0.8 0.79166667 0.8
|
|
0.82142857 0.7768595 0.78448276 0.77310924]
|
|
|
|
mean value: 0.7949883660901246
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.58333333 0.83333333 0.66666667 0.83333333
|
|
0.75 0.83333333 1. 0.90909091]
|
|
|
|
mean value: 0.7659090909090909
|
|
|
|
key: train_recall
|
|
value: [0.81308411 0.80373832 0.82242991 0.85981308 0.88785047 0.82242991
|
|
0.85981308 0.87850467 0.85046729 0.85185185]
|
|
|
|
mean value: 0.8449982692973347
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.66666667 0.5 0.75 0.66666667 0.79166667
|
|
0.625 0.83333333 0.91666667 0.91287879]
|
|
|
|
mean value: 0.7454545454545455
|
|
|
|
key: train_roc_auc
|
|
value: [0.79543094 0.80464694 0.81862236 0.82342506 0.82818449 0.8093631
|
|
0.83731395 0.81425234 0.8094929 0.80092593]
|
|
|
|
mean value: 0.814165801315334
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.42857143 0.36842105 0.625 0.5 0.66666667
|
|
0.5 0.71428571 0.85714286 0.83333333]
|
|
|
|
mean value: 0.6136278195488721
|
|
|
|
key: train_jcc
|
|
value: [0.66412214 0.671875 0.69291339 0.70769231 0.71969697 0.68217054
|
|
0.72440945 0.70149254 0.68939394 0.68148148]
|
|
|
|
mean value: 0.693524775026404
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01054907 0.01217103 0.00988889 0.00933909 0.00983024 0.00909495
|
|
0.00927186 0.00978088 0.00950527 0.00981116]
|
|
|
|
mean value: 0.00992424488067627
|
|
|
|
key: score_time
|
|
value: [0.05331373 0.02406383 0.01151419 0.01075935 0.01080298 0.01084328
|
|
0.01075506 0.01048541 0.01077724 0.01094627]
|
|
|
|
mean value: 0.01642613410949707
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.58536941 0.09166985 0.16903085 0.3380617 0.91986621
|
|
0.1767767 0.3380617 0.58536941 0.48075018]
|
|
|
|
mean value: 0.42703254071232083
|
|
|
|
key: train_mcc
|
|
value: [0.61642079 0.60464892 0.60225989 0.65942846 0.59461381 0.60464892
|
|
0.6335132 0.59600656 0.62203998 0.61491869]
|
|
|
|
mean value: 0.6148499207645965
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.79166667 0.54166667 0.58333333 0.66666667 0.95833333
|
|
0.58333333 0.66666667 0.79166667 0.73913043]
|
|
|
|
mean value: 0.7114130434782608
|
|
|
|
key: train_accuracy
|
|
value: [0.80465116 0.8 0.8 0.82790698 0.79534884 0.8
|
|
0.81395349 0.79534884 0.80930233 0.80555556]
|
|
|
|
mean value: 0.8052067183462532
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.8 0.62068966 0.61538462 0.69230769 0.95652174
|
|
0.64285714 0.69230769 0.8 0.7 ]
|
|
|
|
mean value: 0.7302677232812166
|
|
|
|
key: train_fscore
|
|
value: [0.8173913 0.81057269 0.80717489 0.83555556 0.80530973 0.81057269
|
|
0.8245614 0.80701754 0.81777778 0.81578947]
|
|
|
|
mean value: 0.8151723055588781
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.76923077 0.52941176 0.57142857 0.64285714 1.
|
|
0.5625 0.64285714 0.76923077 0.77777778]
|
|
|
|
mean value: 0.7083475756269875
|
|
|
|
key: train_precision
|
|
value: [0.76422764 0.76666667 0.77586207 0.79661017 0.76470588 0.76666667
|
|
0.7768595 0.76033058 0.77966102 0.775 ]
|
|
|
|
mean value: 0.7726590196013521
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 0.75 0.66666667 0.75 0.91666667
|
|
0.75 0.75 0.83333333 0.63636364]
|
|
|
|
mean value: 0.7636363636363637
|
|
|
|
key: train_recall
|
|
value: [0.87850467 0.85981308 0.8411215 0.87850467 0.85046729 0.85981308
|
|
0.87850467 0.85981308 0.85981308 0.86111111]
|
|
|
|
mean value: 0.8627466251298027
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.79166667 0.54166667 0.58333333 0.66666667 0.95833333
|
|
0.58333333 0.66666667 0.79166667 0.73484848]
|
|
|
|
mean value: 0.7109848484848484
|
|
|
|
key: train_roc_auc
|
|
value: [0.80499308 0.80027691 0.80019038 0.82814123 0.79560402 0.80027691
|
|
0.81425234 0.79564728 0.80953617 0.80555556]
|
|
|
|
mean value: 0.8054473866389754
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.66666667 0.45 0.44444444 0.52941176 0.91666667
|
|
0.47368421 0.52941176 0.66666667 0.53846154]
|
|
|
|
mean value: 0.5858270865701206
|
|
|
|
key: train_jcc
|
|
value: [0.69117647 0.68148148 0.67669173 0.71755725 0.67407407 0.68148148
|
|
0.70149254 0.67647059 0.69172932 0.68888889]
|
|
|
|
mean value: 0.6881043826602864
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01442719 0.01194906 0.0118742 0.01261497 0.0117321 0.01312518
|
|
0.01234365 0.011832 0.01180959 0.01206326]
|
|
|
|
mean value: 0.012377119064331055
|
|
|
|
key: score_time
|
|
value: [0.00988269 0.00954556 0.00970721 0.01022983 0.00949931 0.01051331
|
|
0.00969958 0.00951052 0.00961781 0.00963497]
|
|
|
|
mean value: 0.009784078598022461
|
|
|
|
key: test_mcc
|
|
value: [0.83333333 0.66666667 0.45834925 0.41812101 0.43033148 0.6761234
|
|
0.50709255 0.6761234 0.6761234 0.74242424]
|
|
|
|
mean value: 0.6084688743039378
|
|
|
|
key: train_mcc
|
|
value: [0.7751614 0.76916509 0.80755603 0.78777764 0.83492613 0.80044837
|
|
0.77323619 0.75638496 0.79889412 0.7741473 ]
|
|
|
|
mean value: 0.7877697215690955
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.83333333 0.70833333 0.70833333 0.70833333 0.83333333
|
|
0.75 0.83333333 0.83333333 0.86956522]
|
|
|
|
mean value: 0.7994565217391305
|
|
|
|
key: train_accuracy
|
|
value: [0.88372093 0.87906977 0.90232558 0.89302326 0.91627907 0.89767442
|
|
0.88372093 0.8744186 0.89767442 0.88425926]
|
|
|
|
mean value: 0.8912166236003445
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.83333333 0.75862069 0.72 0.74074074 0.81818182
|
|
0.76923077 0.84615385 0.84615385 0.86956522]
|
|
|
|
mean value: 0.8118646927507497
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[0.89082969 0.88793103 0.9058296 0.8959276 0.91891892 0.90265487
|
|
0.88986784 0.88209607 0.90178571 0.89082969]
|
|
|
|
mean value: 0.8966671033091514
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.83333333 0.64705882 0.69230769 0.66666667 0.9
|
|
0.71428571 0.78571429 0.78571429 0.83333333]
|
|
|
|
mean value: 0.777508080155139
|
|
|
|
key: train_precision
|
|
value: [0.83606557 0.824 0.87068966 0.86842105 0.88695652 0.85714286
|
|
0.84166667 0.82786885 0.86324786 0.84297521]
|
|
|
|
mean value: 0.8519034249441588
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.91666667 0.75 0.83333333 0.75
|
|
0.83333333 0.91666667 0.91666667 0.90909091]
|
|
|
|
mean value: 0.8575757575757575
|
|
|
|
key: train_recall
|
|
value: [0.95327103 0.96261682 0.94392523 0.92523364 0.95327103 0.95327103
|
|
0.94392523 0.94392523 0.94392523 0.94444444]
|
|
|
|
mean value: 0.9467808930425753
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.83333333 0.70833333 0.70833333 0.70833333 0.83333333
|
|
0.75 0.83333333 0.83333333 0.87121212]
|
|
|
|
mean value: 0.7996212121212122
|
|
|
|
key: train_roc_auc
|
|
value: [0.88404292 0.87945656 0.90251817 0.89317238 0.91645033 0.89793181
|
|
0.88399965 0.87474039 0.89788854 0.88425926]
|
|
|
|
mean value: 0.8914460020768432
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.71428571 0.61111111 0.5625 0.58823529 0.69230769
|
|
0.625 0.73333333 0.73333333 0.76923077]
|
|
|
|
mean value: 0.6875491093873447
|
|
|
|
key: train_jcc
|
|
value: [0.80314961 0.79844961 0.82786885 0.81147541 0.85 0.82258065
|
|
0.8015873 0.7890625 0.82113821 0.80314961]
|
|
|
|
mean value: 0.8128461745427313
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.16637278 0.92700911 0.56351781 0.95615482 0.75952268 0.51315856
|
|
0.87010455 0.76887608 0.95761991 1.15136218]
|
|
|
|
mean value: 0.8633698463439942
|
|
|
|
key: score_time
|
|
value: [0.01427674 0.01241374 0.01237249 0.01237464 0.01255679 0.01212168
|
|
0.0123601 0.01237512 0.0270009 0.01376033]
|
|
|
|
mean value: 0.014161252975463867
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.53033009 0.64168895 0.66666667 0.50709255 0.6761234
|
|
0.60246408 0.6761234 0.84515425 0.48075018]
|
|
|
|
mean value: 0.6293060233723498
|
|
|
|
key: train_mcc
|
|
value: [0.94484861 0.95386483 0.85115957 0.92557979 0.82827515 0.87038973
|
|
0.83961263 0.94418484 0.91953574 0.95407186]
|
|
|
|
mean value: 0.9031522737284948
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.79166667 0.83333333 0.75 0.83333333
|
|
0.79166667 0.83333333 0.91666667 0.73913043]
|
|
|
|
mean value: 0.8072463768115942
|
|
|
|
key: train_accuracy
|
|
value: [0.97209302 0.97674419 0.9255814 0.9627907 0.90697674 0.93488372
|
|
0.91627907 0.97209302 0.95813953 0.97685185]
|
|
|
|
mean value: 0.9502433247200689
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.7 0.82758621 0.83333333 0.72727273 0.81818182
|
|
0.76190476 0.84615385 0.92307692 0.7 ]
|
|
|
|
mean value: 0.7970842950153295
|
|
|
|
key: train_fscore
|
|
value: [0.97247706 0.97695853 0.92523364 0.96261682 0.89690722 0.93577982
|
|
0.91 0.97196262 0.95964126 0.97716895]
|
|
|
|
mean value: 0.9488745912063633
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.875 0.70588235 0.83333333 0.8 0.9
|
|
0.88888889 0.78571429 0.85714286 0.77777778]
|
|
|
|
mean value: 0.8257072829131653
|
|
|
|
key: train_precision
|
|
value: [0.95495495 0.96363636 0.92523364 0.96261682 1. 0.91891892
|
|
0.97849462 0.97196262 0.92241379 0.96396396]
|
|
|
|
mean value: 0.9562195702345714
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.58333333 1. 0.83333333 0.66666667 0.75
|
|
0.66666667 0.91666667 1. 0.63636364]
|
|
|
|
mean value: 0.7886363636363636
|
|
|
|
key: train_recall
|
|
value: [0.99065421 0.99065421 0.92523364 0.96261682 0.81308411 0.95327103
|
|
0.85046729 0.97196262 1. 0.99074074]
|
|
|
|
mean value: 0.9448684665974385
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.75 0.79166667 0.83333333 0.75 0.83333333
|
|
0.79166667 0.83333333 0.91666667 0.73484848]
|
|
|
|
mean value: 0.8068181818181819
|
|
|
|
key: train_roc_auc
|
|
value: [0.97217895 0.97680858 0.92557979 0.96278989 0.90654206 0.93496885
|
|
0.91597439 0.97209242 0.95833333 0.97685185]
|
|
|
|
mean value: 0.9502120110764971
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.53846154 0.70588235 0.71428571 0.57142857 0.69230769
|
|
0.61538462 0.73333333 0.85714286 0.53846154]
|
|
|
|
mean value: 0.6680973928032752
|
|
|
|
key: train_jcc
|
|
value: [0.94642857 0.95495495 0.86086957 0.92792793 0.81308411 0.87931034
|
|
0.83486239 0.94545455 0.92241379 0.95535714]
|
|
|
|
mean value: 0.9040663343242202
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02159524 0.01609659 0.01578259 0.01616478 0.01528406 0.01571155
|
|
0.01467395 0.01572943 0.01593328 0.01627374]
|
|
|
|
mean value: 0.016324520111083984
|
|
|
|
key: score_time
|
|
value: [0.01183963 0.00899029 0.00861549 0.00864005 0.00864673 0.00881767
|
|
0.00853395 0.00855303 0.00865746 0.00861073]
|
|
|
|
mean value: 0.00899050235748291
|
|
|
|
key: test_mcc
|
|
value: [ 0.58536941 0.45834925 -0.0836242 0.50709255 0.1767767 0.3380617
|
|
0.58536941 0.25819889 0.50709255 0.56490196]
|
|
|
|
mean value: 0.3897588209443773
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.45833333 0.75 0.58333333 0.66666667
|
|
0.79166667 0.625 0.75 0.7826087 ]
|
|
|
|
mean value: 0.6907608695652174
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.63157895 0.48 0.76923077 0.64285714 0.63636364
|
|
0.8 0.66666667 0.76923077 0.76190476]
|
|
|
|
mean value: 0.6940441389274341
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.85714286 0.46153846 0.71428571 0.5625 0.7
|
|
0.76923077 0.6 0.71428571 0.8 ]
|
|
|
|
mean value: 0.6997165334665335
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.5 0.83333333 0.75 0.58333333
|
|
0.83333333 0.75 0.83333333 0.72727273]
|
|
|
|
mean value: 0.706060606060606
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.45833333 0.75 0.58333333 0.66666667
|
|
0.79166667 0.625 0.75 0.78030303]
|
|
|
|
mean value: 0.690530303030303
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.46153846 0.31578947 0.625 0.47368421 0.46666667
|
|
0.66666667 0.5 0.625 0.61538462]
|
|
|
|
mean value: 0.5392587237324079
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10004592 0.09774756 0.0969007 0.0985167 0.09694242 0.09730864
|
|
0.09703588 0.09811378 0.09755826 0.09992361]
|
|
|
|
mean value: 0.09800934791564941
|
|
|
|
key: score_time
|
|
value: [0.0174191 0.01727986 0.01732039 0.01732492 0.01718807 0.0171814
|
|
0.01742411 0.01737833 0.01726508 0.01750612]
|
|
|
|
mean value: 0.017328739166259766
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.50709255 0.35355339 0.50709255 0.41812101 0.60246408
|
|
0.43033148 0.6761234 0.75261781 0.39393939]
|
|
|
|
mean value: 0.5393953475994654
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.66666667 0.75 0.70833333 0.79166667
|
|
0.70833333 0.83333333 0.875 0.69565217]
|
|
|
|
mean value: 0.7653985507246377
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.72727273 0.71428571 0.76923077 0.72 0.76190476
|
|
0.74074074 0.84615385 0.88 0.69565217]
|
|
|
|
mean value: 0.7724805950892907
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.8 0.625 0.71428571 0.69230769 0.88888889
|
|
0.66666667 0.78571429 0.84615385 0.66666667]
|
|
|
|
mean value: 0.759477466977467
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.66666667 0.83333333 0.83333333 0.75 0.66666667
|
|
0.83333333 0.91666667 0.91666667 0.72727273]
|
|
|
|
mean value: 0.7977272727272727
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.66666667 0.75 0.70833333 0.79166667
|
|
0.70833333 0.83333333 0.875 0.6969697 ]
|
|
|
|
mean value: 0.7655303030303031
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.57142857 0.55555556 0.625 0.5625 0.61538462
|
|
0.58823529 0.73333333 0.78571429 0.53333333]
|
|
|
|
mean value: 0.6339715758098111
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00928187 0.00908732 0.0090332 0.00918388 0.01034474 0.00947237
|
|
0.0090425 0.00902772 0.00928903 0.00906014]
|
|
|
|
mean value: 0.009282279014587402
|
|
|
|
key: score_time
|
|
value: [0.0086329 0.00856829 0.00864267 0.00907159 0.00937819 0.00861835
|
|
0.00854588 0.00861311 0.00856185 0.00865269]
|
|
|
|
mean value: 0.008728551864624023
|
|
|
|
key: test_mcc
|
|
value: [ 0.3380617 0.25819889 0.0836242 0.33333333 0. 0.3380617
|
|
-0.16903085 0.33333333 0.41812101 0.21452908]
|
|
|
|
mean value: 0.21482323978354503
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.625 0.54166667 0.66666667 0.5 0.66666667
|
|
0.41666667 0.66666667 0.70833333 0.60869565]
|
|
|
|
mean value: 0.6067028985507247
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.63636364 0.57142857 0.56 0.66666667 0.53846154 0.63636364
|
|
0.36363636 0.66666667 0.72 0.52631579]
|
|
|
|
mean value: 0.5885902869060764
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.66666667 0.53846154 0.66666667 0.5 0.7
|
|
0.4 0.66666667 0.69230769 0.625 ]
|
|
|
|
mean value: 0.615576923076923
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.58333333 0.5 0.58333333 0.66666667 0.58333333 0.58333333
|
|
0.33333333 0.66666667 0.75 0.45454545]
|
|
|
|
mean value: 0.5704545454545454
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.625 0.54166667 0.66666667 0.5 0.66666667
|
|
0.41666667 0.66666667 0.70833333 0.60227273]
|
|
|
|
mean value: 0.6060606060606061
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.46666667 0.4 0.38888889 0.5 0.36842105 0.46666667
|
|
0.22222222 0.5 0.5625 0.35714286]
|
|
|
|
mean value: 0.42325083542188807
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.30548859 1.33230543 1.29387045 1.30024052 1.30000615 1.29675961
|
|
1.29720092 1.37435007 1.34458041 1.36174321]
|
|
|
|
mean value: 1.320654535293579
|
|
|
|
key: score_time
|
|
value: [0.08945274 0.08969522 0.08928704 0.14881968 0.08952141 0.08939528
|
|
0.08970332 0.09751368 0.09607148 0.09696031]
|
|
|
|
mean value: 0.09764201641082763
|
|
|
|
key: test_mcc
|
|
value: [0.6761234 0.57735027 0.60246408 0.66666667 0.6761234 0.64168895
|
|
0.41812101 0.53033009 0.75261781 0.38932432]
|
|
|
|
mean value: 0.5930809987870407
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.79166667 0.83333333 0.83333333 0.79166667
|
|
0.70833333 0.75 0.875 0.69565217]
|
|
|
|
mean value: 0.7862318840579711
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.66666667 0.81481481 0.83333333 0.84615385 0.73684211
|
|
0.72 0.78571429 0.88 0.66666667]
|
|
|
|
mean value: 0.7768373536794589
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 0.73333333 0.83333333 0.78571429 1.
|
|
0.69230769 0.6875 0.84615385 0.7 ]
|
|
|
|
mean value: 0.8178342490842491
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.91666667 0.83333333 0.91666667 0.58333333
|
|
0.75 0.91666667 0.91666667 0.63636364]
|
|
|
|
mean value: 0.771969696969697
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.75 0.79166667 0.83333333 0.83333333 0.79166667
|
|
0.70833333 0.75 0.875 0.69318182]
|
|
|
|
mean value: 0.7859848484848485
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.5 0.6875 0.71428571 0.73333333 0.58333333
|
|
0.5625 0.64705882 0.78571429 0.5 ]
|
|
|
|
mean value: 0.6406033182503771
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.73757911 0.88143897 0.87742066 0.89499593 1.00112581 0.90873289
|
|
0.8691864 0.89679146 0.90489745 0.89780354]
|
|
|
|
mean value: 0.9869972229003906
|
|
|
|
key: score_time
|
|
value: [0.25257754 0.21717262 0.16878605 0.22824645 0.13930917 0.22713327
|
|
0.25927711 0.24127674 0.2348907 0.26814222]
|
|
|
|
mean value: 0.2236811876296997
|
|
|
|
key: test_mcc
|
|
value: [0.6761234 0.45834925 0.60246408 0.75261781 0.6761234 0.64168895
|
|
0.41812101 0.60246408 0.66666667 0.47727273]
|
|
|
|
mean value: 0.597189136410222
|
|
|
|
key: train_mcc
|
|
value: [0.92574643 0.89803517 0.90713977 0.89803517 0.92623389 0.89803517
|
|
0.89803517 0.91632053 0.90713977 0.91702052]
|
|
|
|
mean value: 0.9091741604771293
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.70833333 0.79166667 0.875 0.83333333 0.79166667
|
|
0.70833333 0.79166667 0.83333333 0.73913043]
|
|
|
|
mean value: 0.7905797101449276
|
|
|
|
key: train_accuracy
|
|
value: [0.9627907 0.94883721 0.95348837 0.94883721 0.9627907 0.94883721
|
|
0.94883721 0.95813953 0.95348837 0.95833333]
|
|
|
|
mean value: 0.954437984496124
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.63157895 0.81481481 0.88 0.84615385 0.73684211
|
|
0.69565217 0.81481481 0.83333333 0.72727273]
|
|
|
|
mean value: 0.7798644581115977
|
|
|
|
key: train_fscore
|
|
value: [0.96296296 0.94930876 0.9537037 0.94930876 0.96330275 0.94930876
|
|
0.94930876 0.95813953 0.9537037 0.95890411]
|
|
|
|
mean value: 0.9547951790178185
|
|
|
|
key: test_precision
|
|
value: [0.9 0.85714286 0.73333333 0.84615385 0.78571429 1.
|
|
0.72727273 0.73333333 0.83333333 0.72727273]
|
|
|
|
mean value: 0.8143556443556443
|
|
|
|
key: train_precision
|
|
value: [0.95412844 0.93636364 0.94495413 0.93636364 0.94594595 0.93636364
|
|
0.93636364 0.9537037 0.94495413 0.94594595]
|
|
|
|
mean value: 0.9435086838297848
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.91666667 0.91666667 0.91666667 0.58333333
|
|
0.66666667 0.91666667 0.83333333 0.72727273]
|
|
|
|
mean value: 0.7727272727272727
|
|
|
|
key: train_recall
|
|
value: [0.97196262 0.96261682 0.96261682 0.96261682 0.98130841 0.96261682
|
|
0.96261682 0.96261682 0.96261682 0.97222222]
|
|
|
|
mean value: 0.9663811007268951
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.70833333 0.79166667 0.875 0.83333333 0.79166667
|
|
0.70833333 0.79166667 0.83333333 0.73863636]
|
|
|
|
mean value: 0.790530303030303
|
|
|
|
key: train_roc_auc
|
|
value: [0.96283316 0.948901 0.95353063 0.948901 0.96287643 0.948901
|
|
0.948901 0.95816026 0.95353063 0.95833333]
|
|
|
|
mean value: 0.9544868466597439
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.46153846 0.6875 0.78571429 0.73333333 0.58333333
|
|
0.53333333 0.6875 0.71428571 0.57142857]
|
|
|
|
mean value: 0.6450274725274725
|
|
|
|
key: train_jcc
|
|
value: [0.92857143 0.90350877 0.91150442 0.90350877 0.92920354 0.90350877
|
|
0.90350877 0.91964286 0.91150442 0.92105263]
|
|
|
|
mean value: 0.9135514394393063
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02658057 0.01026082 0.01046276 0.01046801 0.01056957 0.01058936
|
|
0.00954437 0.00941682 0.00935864 0.00949931]
|
|
|
|
mean value: 0.011675024032592773
|
|
|
|
key: score_time
|
|
value: [0.01053667 0.00951457 0.00990701 0.00974226 0.0097394 0.00976729
|
|
0.00901318 0.00893641 0.008919 0.00859046]
|
|
|
|
mean value: 0.00946662425994873
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.35355339 0. 0.50709255 0.33333333 0.58536941
|
|
0.25819889 0.66666667 0.84515425 0.82575758]
|
|
|
|
mean value: 0.49604954776735644
|
|
|
|
key: train_mcc
|
|
value: [0.59116891 0.60929387 0.63724472 0.64842315 0.66084467 0.61888689
|
|
0.67521245 0.6335132 0.62092317 0.6049981 ]
|
|
|
|
mean value: 0.6300509127566093
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.66666667 0.5 0.75 0.66666667 0.79166667
|
|
0.625 0.83333333 0.91666667 0.91304348]
|
|
|
|
mean value: 0.7454710144927537
|
|
|
|
key: train_accuracy
|
|
value: [0.79534884 0.80465116 0.81860465 0.82325581 0.82790698 0.80930233
|
|
0.8372093 0.81395349 0.80930233 0.80092593]
|
|
|
|
mean value: 0.8140460809646857
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.6 0.53846154 0.76923077 0.66666667 0.8
|
|
0.66666667 0.83333333 0.92307692 0.90909091]
|
|
|
|
mean value: 0.748913550217898
|
|
|
|
key: train_fscore
|
|
value: [0.79816514 0.80373832 0.81860465 0.82882883 0.83700441 0.81105991
|
|
0.84018265 0.8245614 0.8161435 0.81057269]
|
|
|
|
mean value: 0.8188861485376868
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.75 0.5 0.71428571 0.66666667 0.76923077
|
|
0.6 0.83333333 0.85714286 0.90909091]
|
|
|
|
mean value: 0.7417932067932068
|
|
|
|
key: train_precision
|
|
value: [0.78378378 0.80373832 0.81481481 0.8 0.79166667 0.8
|
|
0.82142857 0.7768595 0.78448276 0.77310924]
|
|
|
|
mean value: 0.7949883660901246
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.58333333 0.83333333 0.66666667 0.83333333
|
|
0.75 0.83333333 1. 0.90909091]
|
|
|
|
mean value: 0.7659090909090909
|
|
|
|
key: train_recall
|
|
value: [0.81308411 0.80373832 0.82242991 0.85981308 0.88785047 0.82242991
|
|
0.85981308 0.87850467 0.85046729 0.85185185]
|
|
|
|
mean value: 0.8449982692973347
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.66666667 0.5 0.75 0.66666667 0.79166667
|
|
0.625 0.83333333 0.91666667 0.91287879]
|
|
|
|
mean value: 0.7454545454545455
|
|
|
|
key: train_roc_auc
|
|
value: [0.79543094 0.80464694 0.81862236 0.82342506 0.82818449 0.8093631
|
|
0.83731395 0.81425234 0.8094929 0.80092593]
|
|
|
|
mean value: 0.814165801315334
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.42857143 0.36842105 0.625 0.5 0.66666667
|
|
0.5 0.71428571 0.85714286 0.83333333]
|
|
|
|
mean value: 0.6136278195488721
|
|
|
|
key: train_jcc
|
|
value: [0.66412214 0.671875 0.69291339 0.70769231 0.71969697 0.68217054
|
|
0.72440945 0.70149254 0.68939394 0.68148148]
|
|
|
|
mean value: 0.693524775026404
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.10902405 0.1528337 0.10040998 0.20752549 0.06414986 0.0619688
|
|
0.0615015 0.06558824 0.06363559 0.06329346]
|
|
|
|
mean value: 0.09499306678771972
|
|
|
|
key: score_time
|
|
value: [0.01049137 0.01351738 0.01741481 0.01112556 0.01131034 0.0103085
|
|
0.01125455 0.01034284 0.01033878 0.01028371]
|
|
|
|
mean value: 0.011638784408569336
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.45834925 0.50709255 0.58536941 0.58536941 0.64168895
|
|
0.58536941 0.43033148 0.83333333 0.38932432]
|
|
|
|
mean value: 0.5768845916089698
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.70833333 0.75 0.79166667 0.79166667 0.79166667
|
|
0.79166667 0.70833333 0.91666667 0.69565217]
|
|
|
|
mean value: 0.7820652173913043
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.63157895 0.76923077 0.8 0.7826087 0.73684211
|
|
0.8 0.74074074 0.91666667 0.66666667]
|
|
|
|
mean value: 0.77138998089799
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.85714286 0.71428571 0.76923077 0.81818182 1.
|
|
0.76923077 0.66666667 0.91666667 0.7 ]
|
|
|
|
mean value: 0.8120496170496171
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.5 0.83333333 0.83333333 0.75 0.58333333
|
|
0.83333333 0.83333333 0.91666667 0.63636364]
|
|
|
|
mean value: 0.7553030303030304
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.70833333 0.75 0.79166667 0.79166667 0.79166667
|
|
0.79166667 0.70833333 0.91666667 0.69318182]
|
|
|
|
mean value: 0.7818181818181819
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.46153846 0.625 0.66666667 0.64285714 0.58333333
|
|
0.66666667 0.58823529 0.84615385 0.5 ]
|
|
|
|
mean value: 0.6349682180564533
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04819727 0.02819967 0.06439757 0.04967928 0.05256891 0.05795836
|
|
0.04971886 0.06058121 0.06324649 0.04557896]
|
|
|
|
mean value: 0.05201265811920166
|
|
|
|
key: score_time
|
|
value: [0.012532 0.01256657 0.01874089 0.02269316 0.01954985 0.02217078
|
|
0.01249123 0.02141213 0.02277303 0.01738524]
|
|
|
|
mean value: 0.01823148727416992
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.30779351 0.16903085 0.43033148 0.16666667 0.66666667
|
|
0.50709255 0.58536941 0.83333333 0.21969697]
|
|
|
|
mean value: 0.45526481023555615
|
|
|
|
key: train_mcc
|
|
value: [0.92574643 0.96278989 0.96295976 0.95352662 0.9443531 0.94418484
|
|
0.95352662 0.96295976 0.94418484 0.97259753]
|
|
|
|
mean value: 0.9526829380391524
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.625 0.58333333 0.70833333 0.58333333 0.83333333
|
|
0.75 0.79166667 0.91666667 0.60869565]
|
|
|
|
mean value: 0.7233695652173913
|
|
|
|
key: train_accuracy
|
|
value: [0.9627907 0.98139535 0.98139535 0.97674419 0.97209302 0.97209302
|
|
0.97674419 0.98139535 0.97209302 0.98611111]
|
|
|
|
mean value: 0.9762855297157622
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.70967742 0.61538462 0.66666667 0.58333333 0.83333333
|
|
0.76923077 0.7826087 0.91666667 0.60869565]
|
|
|
|
mean value: 0.7318930485129643
|
|
|
|
key: train_fscore
|
|
value: [0.96296296 0.98130841 0.98148148 0.97652582 0.97222222 0.97196262
|
|
0.97652582 0.98148148 0.97196262 0.98630137]
|
|
|
|
mean value: 0.9762734806063463
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.57894737 0.57142857 0.77777778 0.58333333 0.83333333
|
|
0.71428571 0.81818182 0.91666667 0.58333333]
|
|
|
|
mean value: 0.7210621250094934
|
|
|
|
key: train_precision
|
|
value: [0.95412844 0.98130841 0.97247706 0.98113208 0.96330275 0.97196262
|
|
0.98113208 0.97247706 0.97196262 0.97297297]
|
|
|
|
mean value: 0.97228560898771
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.91666667 0.66666667 0.58333333 0.58333333 0.83333333
|
|
0.83333333 0.75 0.91666667 0.63636364]
|
|
|
|
mean value: 0.7553030303030304
|
|
|
|
key: train_recall
|
|
value: [0.97196262 0.98130841 0.99065421 0.97196262 0.98130841 0.97196262
|
|
0.97196262 0.99065421 0.97196262 1. ]
|
|
|
|
mean value: 0.9803738317757009
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.625 0.58333333 0.70833333 0.58333333 0.83333333
|
|
0.75 0.79166667 0.91666667 0.60984848]
|
|
|
|
mean value: 0.7234848484848485
|
|
|
|
key: train_roc_auc
|
|
value: [0.96283316 0.98139495 0.98143821 0.97672205 0.97213569 0.97209242
|
|
0.97672205 0.98143821 0.97209242 0.98611111]
|
|
|
|
mean value: 0.9762980269989616
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.55 0.44444444 0.5 0.41176471 0.71428571
|
|
0.625 0.64285714 0.84615385 0.4375 ]
|
|
|
|
mean value: 0.5886291567909215
|
|
|
|
key: train_jcc
|
|
value: [0.92857143 0.96330275 0.96363636 0.95412844 0.94594595 0.94545455
|
|
0.95412844 0.96363636 0.94545455 0.97297297]
|
|
|
|
mean value: 0.9537231798699689
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01826453 0.01130295 0.01051307 0.01082659 0.01079297 0.0103395
|
|
0.00921464 0.01058745 0.00952053 0.01054502]
|
|
|
|
mean value: 0.01119072437286377
|
|
|
|
key: score_time
|
|
value: [0.01071835 0.01040387 0.00927067 0.00956964 0.00958848 0.00956655
|
|
0.00894737 0.00974202 0.00914693 0.00972438]
|
|
|
|
mean value: 0.00966782569885254
|
|
|
|
key: test_mcc
|
|
value: [0.43033148 0.41812101 0.0860663 0.35355339 0.60246408 0.41812101
|
|
0.33333333 0.6761234 0.41812101 0.65151515]
|
|
|
|
mean value: 0.43877501498062393
|
|
|
|
key: train_mcc
|
|
value: [0.48481158 0.49495629 0.48481158 0.4922574 0.47473832 0.49351477
|
|
0.49351477 0.44598003 0.49351477 0.48685383]
|
|
|
|
mean value: 0.48449533333574646
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.70833333 0.54166667 0.66666667 0.79166667 0.70833333
|
|
0.66666667 0.83333333 0.70833333 0.82608696]
|
|
|
|
mean value: 0.7159420289855073
|
|
|
|
key: train_accuracy
|
|
value: [0.73953488 0.74418605 0.73953488 0.74418605 0.73488372 0.74418605
|
|
0.74418605 0.72093023 0.74418605 0.74074074]
|
|
|
|
mean value: 0.7396554694229113
|
|
|
|
key: test_fscore
|
|
value: [0.74074074 0.72 0.59259259 0.71428571 0.81481481 0.72
|
|
0.66666667 0.84615385 0.69565217 0.81818182]
|
|
|
|
mean value: 0.7329088367349237
|
|
|
|
key: train_fscore
|
|
value: [0.75652174 0.76190476 0.75652174 0.75770925 0.7510917 0.75982533
|
|
0.75982533 0.73684211 0.75982533 0.75862069]
|
|
|
|
mean value: 0.7558687971774802
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.69230769 0.53333333 0.625 0.73333333 0.69230769
|
|
0.66666667 0.78571429 0.72727273 0.81818182]
|
|
|
|
mean value: 0.6940784215784216
|
|
|
|
key: train_precision
|
|
value: [0.70731707 0.70967742 0.70731707 0.71666667 0.70491803 0.71311475
|
|
0.71311475 0.69421488 0.71311475 0.70967742]
|
|
|
|
mean value: 0.7089132822832833
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.66666667 0.83333333 0.91666667 0.75
|
|
0.66666667 0.91666667 0.66666667 0.81818182]
|
|
|
|
mean value: 0.7818181818181819
|
|
|
|
key: train_recall
|
|
value: [0.81308411 0.82242991 0.81308411 0.80373832 0.80373832 0.81308411
|
|
0.81308411 0.78504673 0.81308411 0.81481481]
|
|
|
|
mean value: 0.8095188646590515
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.54166667 0.66666667 0.79166667 0.70833333
|
|
0.66666667 0.83333333 0.70833333 0.82575758]
|
|
|
|
mean value: 0.7159090909090909
|
|
|
|
key: train_roc_auc
|
|
value: [0.73987539 0.74454829 0.73987539 0.74446175 0.73520249 0.74450502
|
|
0.74450502 0.72122707 0.74450502 0.74074074]
|
|
|
|
mean value: 0.739944617514711
|
|
|
|
key: test_jcc
|
|
value: [0.58823529 0.5625 0.42105263 0.55555556 0.6875 0.5625
|
|
0.5 0.73333333 0.53333333 0.69230769]
|
|
|
|
mean value: 0.5836317840226509
|
|
|
|
key: train_jcc
|
|
value: [0.60839161 0.61538462 0.60839161 0.60992908 0.6013986 0.61267606
|
|
0.61267606 0.58333333 0.61267606 0.61111111]
|
|
|
|
mean value: 0.6075968125039147
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01256156 0.01546574 0.01470971 0.01508951 0.01480317 0.01682806
|
|
0.01639032 0.01579952 0.01583982 0.01432538]
|
|
|
|
mean value: 0.015181279182434082
|
|
|
|
key: score_time
|
|
value: [0.00943828 0.01121879 0.01107645 0.01168895 0.01164103 0.01182342
|
|
0.01168585 0.01208973 0.01164865 0.01169538]
|
|
|
|
mean value: 0.011400651931762696
|
|
|
|
key: test_mcc
|
|
value: [0.64168895 0.58536941 0.30151134 0.77459667 0.50709255 0.45834925
|
|
0.43033148 0.58536941 0.77459667 0.6992059 ]
|
|
|
|
mean value: 0.5758111628033658
|
|
|
|
key: train_mcc
|
|
value: [0.69402575 0.76240636 0.62644734 0.82329526 0.84188663 0.81614982
|
|
0.81270771 0.61063847 0.74777648 0.66695469]
|
|
|
|
mean value: 0.7402288516692308
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.79166667 0.58333333 0.875 0.75 0.70833333
|
|
0.70833333 0.79166667 0.875 0.82608696]
|
|
|
|
mean value: 0.7701086956521739
|
|
|
|
key: train_accuracy
|
|
value: [0.82790698 0.86976744 0.78139535 0.91162791 0.92093023 0.90697674
|
|
0.89767442 0.77209302 0.86511628 0.81944444]
|
|
|
|
mean value: 0.8572932816537467
|
|
|
|
key: test_fscore
|
|
value: [0.82758621 0.8 0.70588235 0.85714286 0.76923077 0.63157895
|
|
0.74074074 0.7826087 0.88888889 0.84615385]
|
|
|
|
mean value: 0.7849813305015425
|
|
|
|
key: train_fscore
|
|
value: [0.85140562 0.88333333 0.81992337 0.91162791 0.92018779 0.90291262
|
|
0.90677966 0.7030303 0.87763713 0.84210526]
|
|
|
|
mean value: 0.8618943007240835
|
|
|
|
key: test_precision
|
|
value: [0.70588235 0.76923077 0.54545455 1. 0.71428571 0.85714286
|
|
0.66666667 0.81818182 0.8 0.73333333]
|
|
|
|
mean value: 0.761017805723688
|
|
|
|
key: train_precision
|
|
value: [0.74647887 0.79699248 0.69480519 0.90740741 0.9245283 0.93939394
|
|
0.82945736 1. 0.8 0.74820144]
|
|
|
|
mean value: 0.8387265001125784
|
|
|
|
key: test_recall
|
|
value: [1. 0.83333333 1. 0.75 0.83333333 0.5
|
|
0.83333333 0.75 1. 1. ]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [0.99065421 0.99065421 1. 0.91588785 0.91588785 0.86915888
|
|
1. 0.54205607 0.97196262 0.96296296]
|
|
|
|
mean value: 0.9159224645205953
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.79166667 0.58333333 0.875 0.75 0.70833333
|
|
0.70833333 0.79166667 0.875 0.83333333]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.82866044 0.8703271 0.78240741 0.91164763 0.92090689 0.90680166
|
|
0.89814815 0.77102804 0.86561094 0.81944444]
|
|
|
|
mean value: 0.8574982692973347
|
|
|
|
key: test_jcc
|
|
value: [0.70588235 0.66666667 0.54545455 0.75 0.625 0.46153846
|
|
0.58823529 0.64285714 0.8 0.73333333]
|
|
|
|
mean value: 0.6518967796908973
|
|
|
|
key: train_jcc
|
|
value: [0.74125874 0.79104478 0.69480519 0.83760684 0.85217391 0.82300885
|
|
0.82945736 0.54205607 0.78195489 0.72727273]
|
|
|
|
mean value: 0.762063936598939
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01563263 0.01531029 0.01476598 0.01530075 0.01501727 0.01689005
|
|
0.01432395 0.01691675 0.01566124 0.01474738]
|
|
|
|
mean value: 0.015456628799438477
|
|
|
|
key: score_time
|
|
value: [0.01183009 0.0117557 0.01172447 0.01171923 0.01170135 0.01168537
|
|
0.01175404 0.011832 0.01171136 0.01169014]
|
|
|
|
mean value: 0.011740374565124511
|
|
|
|
key: test_mcc
|
|
value: [0.60246408 0.45834925 0.57735027 0.57735027 0.53033009 0.60246408
|
|
0.58536941 0.6761234 0.57735027 0.3030303 ]
|
|
|
|
mean value: 0.5490181407944392
|
|
|
|
key: train_mcc
|
|
value: [0.75414397 0.80417261 0.85658506 0.56745034 0.77988595 0.87115575
|
|
0.8191606 0.88004537 0.54792743 0.80125769]
|
|
|
|
mean value: 0.7681784775947282
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.75 0.75 0.75 0.79166667
|
|
0.79166667 0.83333333 0.75 0.65217391]
|
|
|
|
mean value: 0.7568840579710145
|
|
|
|
key: train_accuracy
|
|
value: [0.86511628 0.89767442 0.9255814 0.74418605 0.88372093 0.93488372
|
|
0.90697674 0.93953488 0.73023256 0.89814815]
|
|
|
|
mean value: 0.8726055124892333
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.63157895 0.8 0.66666667 0.78571429 0.76190476
|
|
0.8 0.84615385 0.8 0.63636364]
|
|
|
|
mean value: 0.749028690607638
|
|
|
|
key: train_fscore
|
|
value: [0.84491979 0.88888889 0.92920354 0.65408805 0.89270386 0.93636364
|
|
0.91150442 0.94063927 0.78676471 0.90350877]
|
|
|
|
mean value: 0.8688584936144532
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.85714286 0.66666667 1. 0.6875 0.88888889
|
|
0.76923077 0.78571429 0.66666667 0.63636364]
|
|
|
|
mean value: 0.7847062659562659
|
|
|
|
key: train_precision
|
|
value: [0.9875 0.96703297 0.88235294 1. 0.82539683 0.91150442
|
|
0.86554622 0.91964286 0.64848485 0.85833333]
|
|
|
|
mean value: 0.8865794415833458
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.5 1. 0.5 0.91666667 0.66666667
|
|
0.83333333 0.91666667 1. 0.63636364]
|
|
|
|
mean value: 0.7636363636363637
|
|
|
|
key: train_recall
|
|
value: [0.73831776 0.82242991 0.98130841 0.48598131 0.97196262 0.96261682
|
|
0.96261682 0.96261682 1. 0.9537037 ]
|
|
|
|
mean value: 0.8841554170993423
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.75 0.75 0.75 0.79166667
|
|
0.79166667 0.83333333 0.75 0.65151515]
|
|
|
|
mean value: 0.7568181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [0.86452925 0.89732606 0.92583939 0.74299065 0.88412946 0.93501211
|
|
0.90723434 0.93964174 0.73148148 0.89814815]
|
|
|
|
mean value: 0.8726332641052268
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.46153846 0.66666667 0.5 0.64705882 0.61538462
|
|
0.66666667 0.73333333 0.66666667 0.46666667]
|
|
|
|
mean value: 0.6039366515837103
|
|
|
|
key: train_jcc
|
|
value: [0.73148148 0.8 0.8677686 0.48598131 0.80620155 0.88034188
|
|
0.83739837 0.88793103 0.64848485 0.824 ]
|
|
|
|
mean value: 0.7769589072614843
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14447331 0.12586784 0.1261003 0.11577964 0.11632538 0.11602783
|
|
0.11646795 0.11628914 0.11623144 0.11667013]
|
|
|
|
mean value: 0.12102329730987549
|
|
|
|
key: score_time
|
|
value: [0.01626277 0.01627779 0.0151825 0.0147202 0.01485586 0.01480722
|
|
0.01476502 0.01491356 0.01466155 0.01459432]
|
|
|
|
mean value: 0.015104079246520996
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.64168895 0.38490018 0.60246408 0.2508726 0.50709255
|
|
0.5 0.6761234 0.91986621 0.31298622]
|
|
|
|
mean value: 0.546266085923586
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99078321]
|
|
|
|
mean value: 0.999078321349667
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.79166667 0.66666667 0.79166667 0.625 0.75
|
|
0.75 0.83333333 0.95833333 0.65217391]
|
|
|
|
mean value: 0.7652173913043478
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99537037]
|
|
|
|
mean value: 0.999537037037037
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.73684211 0.73333333 0.81481481 0.60869565 0.72727273
|
|
0.75 0.81818182 0.96 0.66666667]
|
|
|
|
mean value: 0.7649140451039764
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99539171]
|
|
|
|
mean value: 0.9995391705069124
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 0.61111111 0.73333333 0.63636364 0.8
|
|
0.75 0.9 0.92307692 0.61538462]
|
|
|
|
mean value: 0.7802602952602953
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99082569]
|
|
|
|
mean value: 0.9990825688073395
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.58333333 0.91666667 0.91666667 0.58333333 0.66666667
|
|
0.75 0.75 1. 0.72727273]
|
|
|
|
mean value: 0.7727272727272727
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.79166667 0.66666667 0.79166667 0.625 0.75
|
|
0.75 0.83333333 0.95833333 0.65530303]
|
|
|
|
mean value: 0.765530303030303
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99537037]
|
|
|
|
mean value: 0.999537037037037
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.58333333 0.57894737 0.6875 0.4375 0.57142857
|
|
0.6 0.69230769 0.92307692 0.5 ]
|
|
|
|
mean value: 0.6288379602853287
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99082569]
|
|
|
|
mean value: 0.9990825688073395
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04997015 0.04298496 0.06290412 0.04491043 0.05685115 0.05192327
|
|
0.04238701 0.04378319 0.06164503 0.05723691]
|
|
|
|
mean value: 0.05145962238311767
|
|
|
|
key: score_time
|
|
value: [0.01879859 0.02429152 0.02979851 0.01755548 0.02789521 0.01707387
|
|
0.0175848 0.01788163 0.03390718 0.02229738]
|
|
|
|
mean value: 0.022708415985107422
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.51298918 0.41812101 0.50709255 0.60246408 0.70710678
|
|
0.50709255 0.50709255 0.83333333 0.38932432]
|
|
|
|
mean value: 0.5737234159712618
|
|
|
|
key: train_mcc
|
|
value: [0.96345091 0.94564526 0.96345091 0.95451081 0.99073994 0.97213328
|
|
0.96345091 0.99073994 0.96278989 0.96362411]
|
|
|
|
mean value: 0.967053596512949
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.70833333 0.70833333 0.75 0.79166667 0.83333333
|
|
0.75 0.75 0.91666667 0.69565217]
|
|
|
|
mean value: 0.7778985507246376
|
|
|
|
key: train_accuracy
|
|
value: [0.98139535 0.97209302 0.98139535 0.97674419 0.99534884 0.98604651
|
|
0.98139535 0.99534884 0.98139535 0.98148148]
|
|
|
|
mean value: 0.9832644272179156
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.58823529 0.72 0.72727273 0.76190476 0.8
|
|
0.72727273 0.76923077 0.91666667 0.66666667]
|
|
|
|
mean value: 0.754681483052327
|
|
|
|
key: train_fscore
|
|
value: [0.98095238 0.97115385 0.98095238 0.97607656 0.99530516 0.98591549
|
|
0.98095238 0.99530516 0.98130841 0.98113208]
|
|
|
|
mean value: 0.9829053852317808
|
|
|
|
key: test_precision
|
|
value: [0.90909091 1. 0.69230769 0.8 0.88888889 1.
|
|
0.8 0.71428571 0.91666667 0.7 ]
|
|
|
|
mean value: 0.8421239871239872
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.99056604
|
|
1. 1. 0.98130841 1. ]
|
|
|
|
mean value: 0.9971874448950803
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.41666667 0.75 0.66666667 0.66666667 0.66666667
|
|
0.66666667 0.83333333 0.91666667 0.63636364]
|
|
|
|
mean value: 0.7053030303030303
|
|
|
|
key: train_recall
|
|
value: [0.96261682 0.94392523 0.96261682 0.95327103 0.99065421 0.98130841
|
|
0.96261682 0.99065421 0.98130841 0.96296296]
|
|
|
|
mean value: 0.9691934925579785
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.70833333 0.70833333 0.75 0.79166667 0.83333333
|
|
0.75 0.75 0.91666667 0.69318182]
|
|
|
|
mean value: 0.7776515151515151
|
|
|
|
key: train_roc_auc
|
|
value: [0.98130841 0.97196262 0.98130841 0.97663551 0.9953271 0.98602458
|
|
0.98130841 0.9953271 0.98139495 0.98148148]
|
|
|
|
mean value: 0.9832078573901003
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.41666667 0.5625 0.57142857 0.61538462 0.66666667
|
|
0.57142857 0.625 0.84615385 0.5 ]
|
|
|
|
mean value: 0.6144459706959707
|
|
|
|
key: train_jcc
|
|
value: [0.96261682 0.94392523 0.96261682 0.95327103 0.99065421 0.97222222
|
|
0.96261682 0.99065421 0.96330275 0.96296296]
|
|
|
|
mean value: 0.9664843077665679
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04568768 0.09330058 0.08510971 0.06851339 0.06498671 0.05895758
|
|
0.06189704 0.06270814 0.06319594 0.06500459]
|
|
|
|
mean value: 0.06693613529205322
|
|
|
|
key: score_time
|
|
value: [0.0236721 0.02396679 0.02092028 0.02220488 0.02225089 0.02138257
|
|
0.02423358 0.01493979 0.021734 0.02368546]
|
|
|
|
mean value: 0.021899032592773437
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.3380617 0. 0.41812101 0.3380617 0.66666667
|
|
0.35355339 0.43033148 0.6761234 0.47727273]
|
|
|
|
mean value: 0.4364858746680441
|
|
|
|
key: train_mcc
|
|
value: [0.99074074 0.99074074 0.99074074 0.99074074 0.99074074 0.99074074
|
|
0.99074074 0.99074074 0.98156643 0.99078321]
|
|
|
|
mean value: 0.9898275564560949
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.66666667 0.5 0.70833333 0.66666667 0.83333333
|
|
0.66666667 0.70833333 0.83333333 0.73913043]
|
|
|
|
mean value: 0.7155797101449275
|
|
|
|
key: train_accuracy
|
|
value: [0.99534884 0.99534884 0.99534884 0.99534884 0.99534884 0.99534884
|
|
0.99534884 0.99534884 0.99069767 0.99537037]
|
|
|
|
mean value: 0.9948858742463393
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.63636364 0.57142857 0.69565217 0.69230769 0.83333333
|
|
0.71428571 0.74074074 0.84615385 0.72727273]
|
|
|
|
mean value: 0.7290871769132639
|
|
|
|
key: train_fscore
|
|
value: [0.99534884 0.99534884 0.99534884 0.99534884 0.99534884 0.99534884
|
|
0.99534884 0.99534884 0.99074074 0.99539171]
|
|
|
|
mean value: 0.9948923143484284
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.7 0.5 0.72727273 0.64285714 0.83333333
|
|
0.625 0.66666667 0.78571429 0.72727273]
|
|
|
|
mean value: 0.7041450216450217
|
|
|
|
key: train_precision
|
|
value: [0.99074074 0.99074074 0.99074074 0.99074074 0.99074074 0.99074074
|
|
0.99074074 0.99074074 0.98165138 0.99082569]
|
|
|
|
mean value: 0.9898402990146109
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.58333333 0.66666667 0.66666667 0.75 0.83333333
|
|
0.83333333 0.83333333 0.91666667 0.72727273]
|
|
|
|
mean value: 0.7643939393939394
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.66666667 0.5 0.70833333 0.66666667 0.83333333
|
|
0.66666667 0.70833333 0.83333333 0.73863636]
|
|
|
|
mean value: 0.7155303030303031
|
|
|
|
key: train_roc_auc
|
|
value: [0.99537037 0.99537037 0.99537037 0.99537037 0.99537037 0.99537037
|
|
0.99537037 0.99537037 0.99074074 0.99537037]
|
|
|
|
mean value: 0.9949074074074074
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.46666667 0.4 0.53333333 0.52941176 0.71428571
|
|
0.55555556 0.58823529 0.73333333 0.57142857]
|
|
|
|
mean value: 0.5806535947712418
|
|
|
|
key: train_jcc
|
|
value: [0.99074074 0.99074074 0.99074074 0.99074074 0.99074074 0.99074074
|
|
0.99074074 0.99074074 0.98165138 0.99082569]
|
|
|
|
mean value: 0.9898402990146109
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.39761639 0.39170408 0.38538814 0.38379908 0.39135814 0.39177537
|
|
0.38714075 0.38791275 0.39024019 0.39440322]
|
|
|
|
mean value: 0.39013381004333497
|
|
|
|
key: score_time
|
|
value: [0.00975585 0.00946522 0.00950503 0.00953794 0.00963545 0.00959516
|
|
0.00949287 0.00942302 0.0098877 0.00990605]
|
|
|
|
mean value: 0.009620428085327148
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.45834925 0.35355339 0.83333333 0.6761234 0.60246408
|
|
0.66666667 0.58536941 0.75261781 0.38932432]
|
|
|
|
mean value: 0.5903171062535404
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.66666667 0.91666667 0.83333333 0.79166667
|
|
0.83333333 0.79166667 0.875 0.69565217]
|
|
|
|
mean value: 0.7903985507246377
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.63157895 0.71428571 0.91666667 0.84615385 0.76190476
|
|
0.83333333 0.7826087 0.88 0.66666667]
|
|
|
|
mean value: 0.7815807327683758
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.85714286 0.625 0.91666667 0.78571429 0.88888889
|
|
0.83333333 0.81818182 0.84615385 0.7 ]
|
|
|
|
mean value: 0.8089263514263514
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.83333333 0.91666667 0.91666667 0.66666667
|
|
0.83333333 0.75 0.91666667 0.63636364]
|
|
|
|
mean value: 0.771969696969697
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.66666667 0.91666667 0.83333333 0.79166667
|
|
0.83333333 0.79166667 0.875 0.69318182]
|
|
|
|
mean value: 0.7901515151515152
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.46153846 0.55555556 0.84615385 0.73333333 0.61538462
|
|
0.71428571 0.64285714 0.78571429 0.5 ]
|
|
|
|
mean value: 0.6497680097680097
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01991081 0.0208385 0.02130866 0.02156377 0.02402329 0.02146912
|
|
0.02118611 0.03131819 0.02325082 0.02378273]
|
|
|
|
mean value: 0.02286520004272461
|
|
|
|
key: score_time
|
|
value: [0.01243114 0.01261592 0.0170927 0.01372766 0.01431441 0.01454306
|
|
0.01456213 0.02570295 0.01408601 0.01437736]
|
|
|
|
mean value: 0.015345335006713867
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.5 0.0860663 0.3380617 0.2508726 0.41812101
|
|
0.27500955 0.41812101 0.3380617 0.13740858]
|
|
|
|
mean value: 0.34283891128738514
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.99074074 0.98156643 0.87730631 1.
|
|
0.96346333 1. 0.9109617 0.91132238]
|
|
|
|
mean value: 0.9635360885574163
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.54166667 0.66666667 0.625 0.70833333
|
|
0.625 0.70833333 0.66666667 0.56521739]
|
|
|
|
mean value: 0.6690217391304347
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99534884 0.99069767 0.93488372 1.
|
|
0.98139535 1. 0.95348837 0.9537037 ]
|
|
|
|
mean value: 0.9809517657192076
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.75 0.59259259 0.69230769 0.60869565 0.72
|
|
0.68965517 0.72 0.69230769 0.58333333]
|
|
|
|
mean value: 0.688222546846235
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99534884 0.99074074 0.93859649 1.
|
|
0.98165138 1. 0.95535714 0.95575221]
|
|
|
|
mean value: 0.9817446800571425
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.75 0.53333333 0.64285714 0.63636364 0.69230769
|
|
0.58823529 0.69230769 0.64285714 0.53846154]
|
|
|
|
mean value: 0.655005680593916
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.99074074 0.98165138 0.88429752 1.
|
|
0.96396396 1. 0.91452991 0.91525424]
|
|
|
|
mean value: 0.9650437753330701
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.66666667 0.75 0.58333333 0.75
|
|
0.83333333 0.75 0.75 0.63636364]
|
|
|
|
mean value: 0.7303030303030303
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.75 0.54166667 0.66666667 0.625 0.70833333
|
|
0.625 0.70833333 0.66666667 0.56818182]
|
|
|
|
mean value: 0.6693181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99537037 0.99074074 0.93518519 1.
|
|
0.98148148 1. 0.9537037 0.9537037 ]
|
|
|
|
mean value: 0.9810185185185185
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.6 0.42105263 0.52941176 0.4375 0.5625
|
|
0.52631579 0.5625 0.52941176 0.41176471]
|
|
|
|
mean value: 0.5294742370632464
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.99074074 0.98165138 0.88429752 1.
|
|
0.96396396 1. 0.91452991 0.91525424]
|
|
|
|
mean value: 0.9650437753330701
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02385974 0.03455305 0.03563571 0.03445697 0.03979993 0.04786658
|
|
0.05260968 0.03588843 0.04903007 0.03504395]
|
|
|
|
mean value: 0.03887441158294678
|
|
|
|
key: score_time
|
|
value: [0.02322555 0.02342844 0.02274847 0.02379513 0.02408171 0.0219028
|
|
0.0235405 0.0222435 0.02363682 0.02212644]
|
|
|
|
mean value: 0.0230729341506958
|
|
|
|
key: test_mcc
|
|
value: [0.6761234 0.60246408 0.35355339 0.41812101 0.41812101 0.3380617
|
|
0.41812101 0.58536941 0.91986621 0.56490196]
|
|
|
|
mean value: 0.5294703160678628
|
|
|
|
key: train_mcc
|
|
value: [0.84220552 0.87942529 0.91632053 0.88853311 0.88853311 0.89866654
|
|
0.89803517 0.889785 0.8608154 0.90803041]
|
|
|
|
mean value: 0.8870350069235775
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.79166667 0.66666667 0.70833333 0.70833333 0.66666667
|
|
0.70833333 0.79166667 0.95833333 0.7826087 ]
|
|
|
|
mean value: 0.7615942028985507
|
|
|
|
key: train_accuracy
|
|
value: [0.92093023 0.93953488 0.95813953 0.94418605 0.94418605 0.94883721
|
|
0.94883721 0.94418605 0.93023256 0.9537037 ]
|
|
|
|
mean value: 0.9432773471145564
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.76190476 0.71428571 0.72 0.72 0.63636364
|
|
0.72 0.8 0.96 0.76190476]
|
|
|
|
mean value: 0.7612640692640693
|
|
|
|
key: train_fscore
|
|
value: [0.92165899 0.94009217 0.95813953 0.94444444 0.94444444 0.94977169
|
|
0.94930876 0.94545455 0.93087558 0.95454545]
|
|
|
|
mean value: 0.9438735597141295
|
|
|
|
key: test_precision
|
|
value: [0.9 0.88888889 0.625 0.69230769 0.69230769 0.7
|
|
0.69230769 0.76923077 0.92307692 0.8 ]
|
|
|
|
mean value: 0.7683119658119658
|
|
|
|
key: train_precision
|
|
value: [0.90909091 0.92727273 0.9537037 0.93577982 0.93577982 0.92857143
|
|
0.93636364 0.92035398 0.91818182 0.9375 ]
|
|
|
|
mean value: 0.9302597838512632
|
|
|
|
key: test_recall
|
|
value: [0.75 0.66666667 0.83333333 0.75 0.75 0.58333333
|
|
0.75 0.83333333 1. 0.72727273]
|
|
|
|
mean value: 0.7643939393939394
|
|
|
|
key: train_recall
|
|
value: [0.93457944 0.95327103 0.96261682 0.95327103 0.95327103 0.97196262
|
|
0.96261682 0.97196262 0.94392523 0.97222222]
|
|
|
|
mean value: 0.9579698857736241
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.79166667 0.66666667 0.70833333 0.70833333 0.66666667
|
|
0.70833333 0.79166667 0.95833333 0.78030303]
|
|
|
|
mean value: 0.7613636363636364
|
|
|
|
key: train_roc_auc
|
|
value: [0.92099342 0.93959848 0.95816026 0.94422811 0.94422811 0.94894427
|
|
0.948901 0.94431464 0.93029595 0.9537037 ]
|
|
|
|
mean value: 0.9433367947386639
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.61538462 0.55555556 0.5625 0.5625 0.46666667
|
|
0.5625 0.66666667 0.92307692 0.61538462]
|
|
|
|
mean value: 0.6222542735042735
|
|
|
|
key: train_jcc
|
|
value: [0.85470085 0.88695652 0.91964286 0.89473684 0.89473684 0.90434783
|
|
0.90350877 0.89655172 0.87068966 0.91304348]
|
|
|
|
mean value: 0.8938915373381364
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.22778225 0.23624635 0.22904706 0.23772502 0.23651695 0.23989558
|
|
0.2544477 0.28176785 0.23043919 0.23300552]
|
|
|
|
mean value: 0.24068734645843506
|
|
|
|
key: score_time
|
|
value: [0.02331424 0.02260542 0.02097154 0.02278924 0.02169299 0.02110124
|
|
0.02430058 0.02264762 0.02297473 0.02027822]
|
|
|
|
mean value: 0.022267580032348633
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.66666667 0.64168895 0.41812101 0.60246408 0.53033009
|
|
0.58536941 0.6761234 0.75261781 0.65151515]
|
|
|
|
mean value: 0.6110265959870853
|
|
|
|
key: train_mcc
|
|
value: [0.74603309 0.78889274 0.7802162 0.75049973 0.76032494 0.77022946
|
|
0.77897523 0.76032494 0.76032494 0.77992042]
|
|
|
|
mean value: 0.767574167245235
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.83333333 0.79166667 0.70833333 0.79166667 0.75
|
|
0.79166667 0.83333333 0.875 0.82608696]
|
|
|
|
mean value: 0.7992753623188406
|
|
|
|
key: train_accuracy
|
|
value: [0.86976744 0.89302326 0.88837209 0.8744186 0.87906977 0.88372093
|
|
0.88837209 0.87906977 0.87906977 0.88888889]
|
|
|
|
mean value: 0.8823772609819122
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.83333333 0.82758621 0.72 0.81481481 0.7
|
|
0.8 0.84615385 0.88 0.81818182]
|
|
|
|
mean value: 0.8022678715032538
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:114: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:117: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.87719298 0.89686099 0.89285714 0.87782805 0.88288288 0.88789238
|
|
0.89189189 0.88288288 0.88288288 0.89285714]
|
|
|
|
mean value: 0.8866029226238309
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.83333333 0.70588235 0.69230769 0.73333333 0.875
|
|
0.76923077 0.78571429 0.84615385 0.81818182]
|
|
|
|
mean value: 0.7877319249378073
|
|
|
|
key: train_precision
|
|
value: [0.82644628 0.86206897 0.85470085 0.85087719 0.85217391 0.85344828
|
|
0.86086957 0.85217391 0.85217391 0.86206897]
|
|
|
|
mean value: 0.8527001839919424
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 1. 0.75 0.91666667 0.58333333
|
|
0.83333333 0.91666667 0.91666667 0.81818182]
|
|
|
|
mean value: 0.8318181818181818
|
|
|
|
key: train_recall
|
|
value: [0.93457944 0.93457944 0.93457944 0.90654206 0.91588785 0.92523364
|
|
0.92523364 0.91588785 0.91588785 0.92592593]
|
|
|
|
mean value: 0.9234337140879196
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.83333333 0.79166667 0.70833333 0.79166667 0.75
|
|
0.79166667 0.83333333 0.875 0.82575758]
|
|
|
|
mean value: 0.7992424242424243
|
|
|
|
key: train_roc_auc
|
|
value: [0.8700675 0.89321565 0.88858602 0.87456732 0.87924022 0.88391312
|
|
0.88854275 0.87924022 0.87924022 0.88888889]
|
|
|
|
mean value: 0.8825501903772932
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.71428571 0.70588235 0.5625 0.6875 0.53846154
|
|
0.66666667 0.73333333 0.78571429 0.69230769]
|
|
|
|
mean value: 0.672950872656755
|
|
|
|
key: train_jcc
|
|
value: [0.78125 0.81300813 0.80645161 0.78225806 0.79032258 0.7983871
|
|
0.80487805 0.79032258 0.79032258 0.80645161]
|
|
|
|
mean value: 0.7963652307894047
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03132892 0.03188396 0.03114724 0.02774787 0.0322659 0.0272336
|
|
0.02989817 0.0314157 0.03231025 0.03273916]
|
|
|
|
mean value: 0.030797076225280762
|
|
|
|
key: score_time
|
|
value: [0.01346755 0.01181531 0.01179171 0.01184106 0.01183009 0.01181269
|
|
0.01179075 0.01178718 0.01394463 0.01460361]
|
|
|
|
mean value: 0.012468457221984863
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.6761234 0.64168895 0.6761234 0.60246408 0.60246408
|
|
0.5 0.58536941 0.83333333 0.58536941]
|
|
|
|
mean value: 0.6288305461987017
|
|
|
|
key: train_mcc
|
|
value: [0.75261781 0.82495863 0.79684302 0.78788184 0.82495863 0.77898084
|
|
0.81537425 0.79973188 0.77898084 0.80642024]
|
|
|
|
mean value: 0.7966747979587403
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.83333333 0.79166667 0.83333333 0.79166667 0.79166667
|
|
0.75 0.79166667 0.91666667 0.79166667]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.875 0.91203704 0.89814815 0.89351852 0.91203704 0.88888889
|
|
0.90740741 0.89814815 0.88888889 0.90277778]
|
|
|
|
mean value: 0.8976851851851851
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.81818182 0.82758621 0.84615385 0.81481481 0.76190476
|
|
0.75 0.8 0.91666667 0.7826087 ]
|
|
|
|
mean value: 0.8100525505922808
|
|
|
|
key: train_fscore
|
|
value: [0.88 0.91402715 0.9 0.8959276 0.91402715 0.89189189
|
|
0.90909091 0.90265487 0.89189189 0.90497738]
|
|
|
|
mean value: 0.9004488836149429
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.9 0.70588235 0.78571429 0.73333333 0.88888889
|
|
0.75 0.76923077 0.91666667 0.81818182]
|
|
|
|
mean value: 0.8086079933138757
|
|
|
|
key: train_precision
|
|
value: [0.84615385 0.89380531 0.88392857 0.87610619 0.89380531 0.86842105
|
|
0.89285714 0.86440678 0.86842105 0.88495575]
|
|
|
|
mean value: 0.8772861011735417
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 1. 0.91666667 0.91666667 0.66666667
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.93518519 0.91666667 0.91666667 0.93518519 0.91666667
|
|
0.92592593 0.94444444 0.91666667 0.92592593]
|
|
|
|
mean value: 0.925
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.83333333 0.79166667 0.83333333 0.79166667 0.79166667
|
|
0.75 0.79166667 0.91666667 0.79166667]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.875 0.91203704 0.89814815 0.89351852 0.91203704 0.88888889
|
|
0.90740741 0.89814815 0.88888889 0.90277778]
|
|
|
|
mean value: 0.8976851851851851
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.69230769 0.70588235 0.73333333 0.6875 0.61538462
|
|
0.6 0.66666667 0.84615385 0.64285714]
|
|
|
|
mean value: 0.6832942792501616
|
|
|
|
key: train_jcc
|
|
value: [0.78571429 0.84166667 0.81818182 0.81147541 0.84166667 0.80487805
|
|
0.83333333 0.82258065 0.80487805 0.82644628]
|
|
|
|
mean value: 0.8190821204112838
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.87871289 0.72661591 0.72426248 0.82515478 0.74664068 0.73621082
|
|
1.05823541 0.73661423 0.84614754 0.7338798 ]
|
|
|
|
mean value: 0.8012474536895752
|
|
|
|
key: score_time
|
|
value: [0.01202941 0.01206493 0.01203799 0.01216793 0.01202607 0.0142746
|
|
0.01201224 0.01200318 0.0119803 0.01208901]
|
|
|
|
mean value: 0.012268567085266113
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.6761234 0.64168895 0.6761234 0.53033009 0.50709255
|
|
0.5 0.58536941 0.6761234 0.66666667]
|
|
|
|
mean value: 0.621213568067322
|
|
|
|
key: train_mcc
|
|
value: [0.74393663 0.77898084 0.77120096 0.82495863 0.74188651 0.94444444
|
|
0.77992042 0.77120096 0.75158034 0.74188651]
|
|
|
|
mean value: 0.7849996266114585
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.83333333 0.79166667 0.83333333 0.75 0.75
|
|
0.75 0.79166667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8041666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.87037037 0.88888889 0.88425926 0.91203704 0.87037037 0.97222222
|
|
0.88888889 0.88425926 0.875 0.87037037]
|
|
|
|
mean value: 0.8916666666666666
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.81818182 0.82758621 0.84615385 0.78571429 0.72727273
|
|
0.75 0.8 0.84615385 0.83333333]
|
|
|
|
mean value: 0.8114396063706408
|
|
|
|
key: train_fscore
|
|
value: [0.87610619 0.89189189 0.88888889 0.91402715 0.87387387 0.97222222
|
|
0.89285714 0.88888889 0.87892377 0.87387387]
|
|
|
|
mean value: 0.8951553893324458
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.9 0.70588235 0.78571429 0.6875 0.8
|
|
0.75 0.76923077 0.78571429 0.83333333]
|
|
|
|
mean value: 0.7863528873087697
|
|
|
|
key: train_precision
|
|
value: [0.83898305 0.86842105 0.85470085 0.89380531 0.85087719 0.97222222
|
|
0.86206897 0.85470085 0.85217391 0.85087719]
|
|
|
|
mean value: 0.8698830609363113
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.75 1. 0.91666667 0.91666667 0.66666667
|
|
0.75 0.83333333 0.91666667 0.83333333]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.91666667 0.92592593 0.93518519 0.89814815 0.97222222
|
|
0.92592593 0.92592593 0.90740741 0.89814815]
|
|
|
|
mean value: 0.9222222222222223
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.83333333 0.79166667 0.83333333 0.75 0.75
|
|
0.75 0.79166667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8041666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.87037037 0.88888889 0.88425926 0.91203704 0.87037037 0.97222222
|
|
0.88888889 0.88425926 0.875 0.87037037]
|
|
|
|
mean value: 0.8916666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.69230769 0.70588235 0.73333333 0.64705882 0.57142857
|
|
0.6 0.66666667 0.73333333 0.71428571]
|
|
|
|
mean value: 0.6850010773540185
|
|
|
|
key: train_jcc
|
|
value: [0.77952756 0.80487805 0.8 0.84166667 0.776 0.94594595
|
|
0.80645161 0.8 0.784 0.776 ]
|
|
|
|
mean value: 0.8114469833351444
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01747465 0.01148367 0.00916314 0.00904512 0.00902748 0.00901794
|
|
0.00899577 0.00941372 0.00913596 0.0089817 ]
|
|
|
|
mean value: 0.010173916816711426
|
|
|
|
key: score_time
|
|
value: [0.01298499 0.00921249 0.00894976 0.00869322 0.00875592 0.00881815
|
|
0.00888896 0.00878215 0.00871611 0.00874591]
|
|
|
|
mean value: 0.009254765510559083
|
|
|
|
key: test_mcc
|
|
value: [ 0.38490018 0.43033148 -0.2236068 0.35355339 0.64168895 0.50709255
|
|
0.27500955 0.70710678 0.60246408 0.6761234 ]
|
|
|
|
mean value: 0.4354663566126372
|
|
|
|
key: train_mcc
|
|
value: [0.50639215 0.4859637 0.54289671 0.5002895 0.49104638 0.51437268
|
|
0.52235132 0.48653363 0.49041703 0.51478398]
|
|
|
|
mean value: 0.5055047079924628
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.70833333 0.41666667 0.66666667 0.79166667 0.75
|
|
0.625 0.83333333 0.79166667 0.83333333]
|
|
|
|
mean value: 0.7083333333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.73611111 0.71296296 0.75925926 0.74537037 0.73148148 0.74074074
|
|
0.74537037 0.72685185 0.72685185 0.73611111]
|
|
|
|
mean value: 0.7361111111111112
|
|
|
|
key: test_fscore
|
|
value: [0.73333333 0.74074074 0.5625 0.71428571 0.82758621 0.76923077
|
|
0.68965517 0.85714286 0.81481481 0.84615385]
|
|
|
|
mean value: 0.755544345501242
|
|
|
|
key: train_fscore
|
|
value: [0.77647059 0.76865672 0.79032258 0.76793249 0.76984127 0.77952756
|
|
0.7826087 0.76862745 0.77042802 0.77992278]
|
|
|
|
mean value: 0.7754338145765779
|
|
|
|
key: test_precision
|
|
value: [0.61111111 0.66666667 0.45 0.625 0.70588235 0.71428571
|
|
0.58823529 0.75 0.73333333 0.78571429]
|
|
|
|
mean value: 0.6630228758169935
|
|
|
|
key: train_precision
|
|
value: [0.67346939 0.64375 0.7 0.70542636 0.67361111 0.67808219
|
|
0.68275862 0.66666667 0.66442953 0.66887417]
|
|
|
|
mean value: 0.6757068036979277
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.75 0.83333333 1. 0.83333333
|
|
0.83333333 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.9537037 0.90740741 0.84259259 0.89814815 0.91666667
|
|
0.91666667 0.90740741 0.91666667 0.93518519]
|
|
|
|
mean value: 0.9111111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.70833333 0.41666667 0.66666667 0.79166667 0.75
|
|
0.625 0.83333333 0.79166667 0.83333333]
|
|
|
|
mean value: 0.7083333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.73611111 0.71296296 0.75925926 0.74537037 0.73148148 0.74074074
|
|
0.74537037 0.72685185 0.72685185 0.73611111]
|
|
|
|
mean value: 0.7361111111111112
|
|
|
|
key: test_jcc
|
|
value: [0.57894737 0.58823529 0.39130435 0.55555556 0.70588235 0.625
|
|
0.52631579 0.75 0.6875 0.73333333]
|
|
|
|
mean value: 0.6142074041668536
|
|
|
|
key: train_jcc
|
|
value: [0.63461538 0.62424242 0.65333333 0.62328767 0.62580645 0.63870968
|
|
0.64285714 0.62420382 0.62658228 0.63924051]
|
|
|
|
mean value: 0.6332878691779598
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00945187 0.00936913 0.00931382 0.00924897 0.00935626 0.0092473
|
|
0.00924015 0.00930476 0.00929523 0.00913787]
|
|
|
|
mean value: 0.009296536445617676
|
|
|
|
key: score_time
|
|
value: [0.00871253 0.00882363 0.00878596 0.00875378 0.00881147 0.00875854
|
|
0.00878882 0.00885057 0.00869274 0.00871849]
|
|
|
|
mean value: 0.008769655227661132
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.35355339 0. 0.70710678 0.33333333 0.58536941
|
|
0.25819889 0.58536941 0.84515425 0.83333333]
|
|
|
|
mean value: 0.5086788203937056
|
|
|
|
key: train_mcc
|
|
value: [0.61205637 0.62060985 0.65743559 0.64993368 0.66222239 0.62039697
|
|
0.66712438 0.64695398 0.62253572 0.58760578]
|
|
|
|
mean value: 0.6346874717823057
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.66666667 0.5 0.83333333 0.66666667 0.79166667
|
|
0.625 0.79166667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_accuracy
|
|
value: [0.80555556 0.81018519 0.8287037 0.82407407 0.8287037 0.81018519
|
|
0.83333333 0.81944444 0.81018519 0.79166667]
|
|
|
|
mean value: 0.8162037037037038
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.6 0.57142857 0.85714286 0.66666667 0.8
|
|
0.66666667 0.7826087 0.92307692 0.91666667]
|
|
|
|
mean value: 0.7566865742952699
|
|
|
|
key: train_fscore
|
|
value: [0.81081081 0.81278539 0.82949309 0.83035714 0.83842795 0.81105991
|
|
0.83636364 0.83261803 0.81777778 0.80349345]
|
|
|
|
mean value: 0.8223187174459913
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.75 0.5 0.75 0.66666667 0.76923077
|
|
0.6 0.81818182 0.85714286 0.91666667]
|
|
|
|
mean value: 0.7446070596070596
|
|
|
|
key: train_precision
|
|
value: [0.78947368 0.8018018 0.82568807 0.80172414 0.79338843 0.80733945
|
|
0.82142857 0.776 0.78632479 0.76033058]
|
|
|
|
mean value: 0.7963499512896963
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.66666667 1. 0.66666667 0.83333333
|
|
0.75 0.75 1. 0.91666667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.83333333 0.82407407 0.83333333 0.86111111 0.88888889 0.81481481
|
|
0.85185185 0.89814815 0.85185185 0.85185185]
|
|
|
|
mean value: 0.850925925925926
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.66666667 0.5 0.83333333 0.66666667 0.79166667
|
|
0.625 0.79166667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [0.80555556 0.81018519 0.8287037 0.82407407 0.8287037 0.81018519
|
|
0.83333333 0.81944444 0.81018519 0.79166667]
|
|
|
|
mean value: 0.8162037037037037
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.42857143 0.4 0.75 0.5 0.66666667
|
|
0.5 0.64285714 0.85714286 0.84615385]
|
|
|
|
mean value: 0.6234249084249084
|
|
|
|
key: train_jcc
|
|
value: [0.68181818 0.68461538 0.70866142 0.70992366 0.72180451 0.68217054
|
|
0.71875 0.71323529 0.69172932 0.67153285]
|
|
|
|
mean value: 0.6984241165933639
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00914621 0.00876808 0.00858617 0.00959158 0.00878024 0.00912333
|
|
0.0090847 0.00912189 0.00884724 0.00896358]
|
|
|
|
mean value: 0.00900130271911621
|
|
|
|
key: score_time
|
|
value: [0.01012707 0.01494837 0.00995064 0.01463628 0.01452112 0.01011086
|
|
0.01094484 0.01011419 0.0106566 0.01070809]
|
|
|
|
mean value: 0.011671805381774902
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.58536941 0. 0.33333333 0.3380617 0.83333333
|
|
0.0860663 0.25819889 0.6761234 0.50709255]
|
|
|
|
mean value: 0.42029483255174716
|
|
|
|
key: train_mcc
|
|
value: [0.62491409 0.59763515 0.62491409 0.65970203 0.59628479 0.57034259
|
|
0.636655 0.59763515 0.63355259 0.60625994]
|
|
|
|
mean value: 0.6147895420546894
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.79166667 0.5 0.66666667 0.66666667 0.91666667
|
|
0.54166667 0.625 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.81018519 0.7962963 0.81018519 0.8287037 0.7962963 0.78240741
|
|
0.81481481 0.7962963 0.81481481 0.80092593]
|
|
|
|
mean value: 0.8050925925925926
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.8 0.57142857 0.66666667 0.69230769 0.91666667
|
|
0.59259259 0.66666667 0.84615385 0.72727273]
|
|
|
|
mean value: 0.7262364125407603
|
|
|
|
key: train_fscore
|
|
value: [0.8209607 0.80869565 0.8209607 0.83555556 0.80701754 0.7965368
|
|
0.82758621 0.80869565 0.8245614 0.81222707]
|
|
|
|
mean value: 0.8162797282320872
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.76923077 0.5 0.66666667 0.64285714 0.91666667
|
|
0.53333333 0.6 0.78571429 0.8 ]
|
|
|
|
mean value: 0.7032650682650683
|
|
|
|
key: train_precision
|
|
value: [0.7768595 0.76229508 0.7768595 0.8034188 0.76666667 0.74796748
|
|
0.77419355 0.76229508 0.78333333 0.76859504]
|
|
|
|
mean value: 0.7722484045001899
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 0.66666667 0.66666667 0.75 0.91666667
|
|
0.66666667 0.75 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.87037037 0.86111111 0.87037037 0.87037037 0.85185185 0.85185185
|
|
0.88888889 0.86111111 0.87037037 0.86111111]
|
|
|
|
mean value: 0.8657407407407407
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.79166667 0.5 0.66666667 0.66666667 0.91666667
|
|
0.54166667 0.625 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7083333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.81018519 0.7962963 0.81018519 0.8287037 0.7962963 0.78240741
|
|
0.81481481 0.7962963 0.81481481 0.80092593]
|
|
|
|
mean value: 0.8050925925925926
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.66666667 0.4 0.5 0.52941176 0.84615385
|
|
0.42105263 0.5 0.73333333 0.57142857]
|
|
|
|
mean value: 0.581090395672439
|
|
|
|
key: train_jcc
|
|
value: [0.6962963 0.67883212 0.6962963 0.71755725 0.67647059 0.6618705
|
|
0.70588235 0.67883212 0.70149254 0.68382353]
|
|
|
|
mean value: 0.6897353589576423
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01296568 0.01241398 0.01232123 0.01224113 0.01185298 0.01195812
|
|
0.01187444 0.01185417 0.0118258 0.01191759]
|
|
|
|
mean value: 0.012122511863708496
|
|
|
|
key: score_time
|
|
value: [0.00993228 0.0095787 0.01052666 0.00957179 0.00952911 0.00951576
|
|
0.00952315 0.00964808 0.00979733 0.00964403]
|
|
|
|
mean value: 0.009726691246032714
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.66666667 0.35355339 0.50709255 0.43033148 0.58536941
|
|
0.50709255 0.6761234 0.58536941 0.75261781]
|
|
|
|
mean value: 0.5816834481651599
|
|
|
|
key: train_mcc
|
|
value: [0.78439613 0.77822 0.79115136 0.77992042 0.83562902 0.78439613
|
|
0.80307223 0.74704394 0.78262379 0.7741473 ]
|
|
|
|
mean value: 0.7860600330933626
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.83333333 0.66666667 0.75 0.70833333 0.79166667
|
|
0.75 0.83333333 0.79166667 0.875 ]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_accuracy
|
|
value: [0.88888889 0.88425926 0.89351852 0.88888889 0.91666667 0.88888889
|
|
0.89814815 0.87037037 0.88888889 0.88425926]
|
|
|
|
mean value: 0.8902777777777777
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.83333333 0.71428571 0.76923077 0.74074074 0.7826087
|
|
0.76923077 0.84615385 0.8 0.88 ]
|
|
|
|
mean value: 0.8015583868627346
|
|
|
|
key: train_fscore
|
|
value: [0.89565217 0.89270386 0.89867841 0.89285714 0.91964286 0.89565217
|
|
0.90434783 0.87826087 0.89473684 0.89082969]
|
|
|
|
mean value: 0.8963361856664529
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.83333333 0.625 0.71428571 0.66666667 0.81818182
|
|
0.71428571 0.78571429 0.76923077 0.84615385]
|
|
|
|
mean value: 0.7619005994005994
|
|
|
|
key: train_precision
|
|
value: [0.8442623 0.832 0.85714286 0.86206897 0.88793103 0.8442623
|
|
0.85245902 0.82786885 0.85 0.84297521]
|
|
|
|
mean value: 0.8500970522770821
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.83333333 0.83333333 0.83333333 0.75
|
|
0.83333333 0.91666667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [0.9537037 0.96296296 0.94444444 0.92592593 0.9537037 0.9537037
|
|
0.96296296 0.93518519 0.94444444 0.94444444]
|
|
|
|
mean value: 0.9481481481481482
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.83333333 0.66666667 0.75 0.70833333 0.79166667
|
|
0.75 0.83333333 0.79166667 0.875 ]
|
|
|
|
mean value: 0.7875000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.88888889 0.88425926 0.89351852 0.88888889 0.91666667 0.88888889
|
|
0.89814815 0.87037037 0.88888889 0.88425926]
|
|
|
|
mean value: 0.8902777777777777
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.71428571 0.55555556 0.625 0.58823529 0.64285714
|
|
0.625 0.73333333 0.66666667 0.78571429]
|
|
|
|
mean value: 0.6722362278244631
|
|
|
|
key: train_jcc
|
|
value: [0.81102362 0.80620155 0.816 0.80645161 0.85123967 0.81102362
|
|
0.82539683 0.78294574 0.80952381 0.80314961]
|
|
|
|
mean value: 0.8122956054460755
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.55233645 0.88785172 0.81239462 1.05234766 1.17555523 0.62090945
|
|
1.19066715 0.98777461 1.14500237 0.67176127]
|
|
|
|
mean value: 0.9096600532531738
|
|
|
|
key: score_time
|
|
value: [0.01228666 0.01233935 0.01228571 0.01497912 0.01464343 0.01230478
|
|
0.01451349 0.01451325 0.01454258 0.01224256]
|
|
|
|
mean value: 0.013465094566345214
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.45834925 0.45834925 0.75261781 0.58536941 0.60246408
|
|
0.5 0.5 0.91986621 0.50709255]
|
|
|
|
mean value: 0.586947795996614
|
|
|
|
key: train_mcc
|
|
value: [0.87966734 0.94460643 0.91670596 0.93618901 0.98164982 0.88148312
|
|
0.95374459 0.93618901 0.96296296 0.89818665]
|
|
|
|
mean value: 0.9291384891935454
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.70833333 0.875 0.79166667 0.79166667
|
|
0.75 0.75 0.95833333 0.75 ]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_accuracy
|
|
value: [0.93981481 0.97222222 0.95833333 0.96759259 0.99074074 0.93981481
|
|
0.97685185 0.96759259 0.98148148 0.94907407]
|
|
|
|
mean value: 0.9643518518518518
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.63157895 0.75862069 0.86956522 0.7826087 0.76190476
|
|
0.75 0.75 0.95652174 0.72727273]
|
|
|
|
mean value: 0.7770681474027169
|
|
|
|
key: train_fscore
|
|
value: [0.93953488 0.97247706 0.95852535 0.96682464 0.99065421 0.93779904
|
|
0.97674419 0.96682464 0.98148148 0.94883721]
|
|
|
|
mean value: 0.9639702708162756
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.85714286 0.64705882 0.90909091 0.81818182 0.88888889
|
|
0.75 0.75 1. 0.8 ]
|
|
|
|
mean value: 0.8238545115015703
|
|
|
|
key: train_precision
|
|
value: [0.94392523 0.96363636 0.95412844 0.99029126 1. 0.97029703
|
|
0.98130841 0.99029126 0.98148148 0.95327103]
|
|
|
|
mean value: 0.9728630512356828
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.91666667 0.83333333 0.75 0.66666667
|
|
0.75 0.75 0.91666667 0.66666667]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_recall
|
|
value: [0.93518519 0.98148148 0.96296296 0.94444444 0.98148148 0.90740741
|
|
0.97222222 0.94444444 0.98148148 0.94444444]
|
|
|
|
mean value: 0.9555555555555556
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.70833333 0.875 0.79166667 0.79166667
|
|
0.75 0.75 0.95833333 0.75 ]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_roc_auc
|
|
value: [0.93981481 0.97222222 0.95833333 0.96759259 0.99074074 0.93981481
|
|
0.97685185 0.96759259 0.98148148 0.94907407]
|
|
|
|
mean value: 0.9643518518518518
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.46153846 0.61111111 0.76923077 0.64285714 0.61538462
|
|
0.6 0.6 0.91666667 0.57142857]
|
|
|
|
mean value: 0.6431074481074481
|
|
|
|
key: train_jcc
|
|
value: [0.88596491 0.94642857 0.92035398 0.93577982 0.98148148 0.88288288
|
|
0.95454545 0.93577982 0.96363636 0.90265487]
|
|
|
|
mean value: 0.9309508148840501
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02154303 0.01629329 0.01646686 0.01569819 0.01544809 0.0164423
|
|
0.01606488 0.01601863 0.01652622 0.01666164]
|
|
|
|
mean value: 0.016716313362121583
|
|
|
|
key: score_time
|
|
value: [0.01181388 0.00914931 0.00873971 0.00877476 0.00904584 0.00866413
|
|
0.00874805 0.0089674 0.00875044 0.00878358]
|
|
|
|
mean value: 0.009143710136413574
|
|
|
|
key: test_mcc
|
|
value: [0.84515425 0.53033009 0.1767767 0.50709255 0.1767767 0.41812101
|
|
0.5 0.2508726 0.50709255 0.58536941]
|
|
|
|
mean value: 0.4497585851896557
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.75 0.58333333 0.75 0.58333333 0.70833333
|
|
0.75 0.625 0.75 0.79166667]
|
|
|
|
mean value: 0.7208333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.7 0.64285714 0.76923077 0.64285714 0.69565217
|
|
0.75 0.64 0.76923077 0.7826087 ]
|
|
|
|
mean value: 0.7315513616817965
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.875 0.5625 0.71428571 0.5625 0.72727273
|
|
0.75 0.61538462 0.71428571 0.81818182]
|
|
|
|
mean value: 0.7196553446553446
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.58333333 0.75 0.83333333 0.75 0.66666667
|
|
0.75 0.66666667 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.75 0.58333333 0.75 0.58333333 0.70833333
|
|
0.75 0.625 0.75 0.79166667]
|
|
|
|
mean value: 0.7208333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.53846154 0.47368421 0.625 0.47368421 0.53333333
|
|
0.6 0.47058824 0.625 0.64285714]
|
|
|
|
mean value: 0.5839751528141621
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1001339 0.10060334 0.09702349 0.09846067 0.09874797 0.09993148
|
|
0.09776998 0.09760618 0.09800601 0.09977889]
|
|
|
|
mean value: 0.09880619049072266
|
|
|
|
key: score_time
|
|
value: [0.0177145 0.0176568 0.01760507 0.01874804 0.01777411 0.01774359
|
|
0.01760697 0.01760936 0.01760936 0.01758695]
|
|
|
|
mean value: 0.01776547431945801
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.43033148 0.50709255 0.6761234 0.41812101 0.75261781
|
|
0.41812101 0.43033148 0.84515425 0.3380617 ]
|
|
|
|
mean value: 0.5482621364743856
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.70833333 0.75 0.83333333 0.70833333 0.875
|
|
0.70833333 0.70833333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.66666667 0.76923077 0.84615385 0.72 0.86956522
|
|
0.72 0.74074074 0.92307692 0.69230769]
|
|
|
|
mean value: 0.7781075188901275
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.77777778 0.71428571 0.78571429 0.69230769 0.90909091
|
|
0.69230769 0.66666667 0.85714286 0.64285714]
|
|
|
|
mean value: 0.7571484071484071
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.58333333 0.83333333 0.91666667 0.75 0.83333333
|
|
0.75 0.83333333 1. 0.75 ]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.70833333 0.75 0.83333333 0.70833333 0.875
|
|
0.70833333 0.70833333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.5 0.625 0.73333333 0.5625 0.76923077
|
|
0.5625 0.58823529 0.85714286 0.52941176]
|
|
|
|
mean value: 0.6441639732816203
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01113224 0.01055098 0.00930738 0.00918555 0.00923061 0.00927019
|
|
0.00917935 0.0091207 0.00938368 0.00915384]
|
|
|
|
mean value: 0.009551453590393066
|
|
|
|
key: score_time
|
|
value: [0.00952339 0.00961161 0.00871277 0.00885367 0.00872374 0.00861049
|
|
0.00865412 0.00864911 0.00867414 0.00875235]
|
|
|
|
mean value: 0.008876538276672364
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.45834925 0.25819889 0.16903085 0. 0.41812101
|
|
0.0836242 0.1767767 0.70710678 0.43033148]
|
|
|
|
mean value: 0.3286908561611308
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.625 0.58333333 0.5 0.70833333
|
|
0.54166667 0.58333333 0.83333333 0.70833333]
|
|
|
|
mean value: 0.6583333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.63157895 0.57142857 0.54545455 0.5 0.72
|
|
0.56 0.64285714 0.8 0.66666667]
|
|
|
|
mean value: 0.6420594569427521
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.85714286 0.66666667 0.6 0.5 0.69230769
|
|
0.53846154 0.5625 1. 0.77777778]
|
|
|
|
mean value: 0.7013038350538351
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.5 0.5 0.5 0.75
|
|
0.58333333 0.75 0.66666667 0.58333333]
|
|
|
|
mean value: 0.6083333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.625 0.58333333 0.5 0.70833333
|
|
0.54166667 0.58333333 0.83333333 0.70833333]
|
|
|
|
mean value: 0.6583333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.46153846 0.4 0.375 0.33333333 0.5625
|
|
0.38888889 0.47368421 0.66666667 0.5 ]
|
|
|
|
mean value: 0.4804468703810809
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.32348752 1.30932379 1.30970073 1.32442284 1.31778431 1.31542468
|
|
1.30667329 1.300565 1.34182906 1.36433816]
|
|
|
|
mean value: 1.3213549375534057
|
|
|
|
key: score_time
|
|
value: [0.09575081 0.09006882 0.09617686 0.09046817 0.09082484 0.0902133
|
|
0.09001493 0.09058738 0.09592247 0.09773922]
|
|
|
|
mean value: 0.09277667999267578
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.64168895 0.60246408 0.75261781 0.75261781 0.64168895
|
|
0.33333333 0.43033148 0.75261781 0.41812101]
|
|
|
|
mean value: 0.6078099029190546
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.79166667 0.79166667 0.875 0.875 0.79166667
|
|
0.66666667 0.70833333 0.875 0.70833333]
|
|
|
|
mean value: 0.7958333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.73684211 0.81481481 0.88 0.88 0.73684211
|
|
0.66666667 0.74074074 0.88 0.69565217]
|
|
|
|
mean value: 0.7901123824052886
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 1. 0.73333333 0.84615385 0.84615385 1.
|
|
0.66666667 0.66666667 0.84615385 0.72727273]
|
|
|
|
mean value: 0.8241491841491841
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.58333333 0.91666667 0.91666667 0.91666667 0.58333333
|
|
0.66666667 0.83333333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.79166667 0.79166667 0.875 0.875 0.79166667
|
|
0.66666667 0.70833333 0.875 0.70833333]
|
|
|
|
mean value: 0.7958333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.58333333 0.6875 0.78571429 0.78571429 0.58333333
|
|
0.5 0.58823529 0.78571429 0.53333333]
|
|
|
|
mean value: 0.6602108920491273
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.89206052 0.91959095 0.96954322 0.89371514 0.94958425 0.91306782
|
|
0.93570518 0.96002221 0.9497087 0.88737702]
|
|
|
|
mean value: 0.9270375013351441
|
|
|
|
key: score_time
|
|
value: [0.20388103 0.21231103 0.21701646 0.13943863 0.23853755 0.251302
|
|
0.21599674 0.22050405 0.18904161 0.17836046]
|
|
|
|
mean value: 0.20663895606994628
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.53033009 0.70710678 0.75261781 0.6761234 0.64168895
|
|
0.33333333 0.50709255 0.75261781 0.41812101]
|
|
|
|
mean value: 0.6071649536972153
|
|
|
|
key: train_mcc
|
|
value: [0.90756304 0.89849486 0.89818665 0.90803041 0.93554619 0.88904134
|
|
0.90756304 0.92608473 0.90803041 0.92608473]
|
|
|
|
mean value: 0.910462541219685
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.83333333 0.875 0.83333333 0.79166667
|
|
0.66666667 0.75 0.875 0.70833333]
|
|
|
|
mean value: 0.7958333333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.9537037 0.94907407 0.94907407 0.9537037 0.96759259 0.94444444
|
|
0.9537037 0.96296296 0.9537037 0.96296296]
|
|
|
|
mean value: 0.9550925925925926
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.7 0.85714286 0.88 0.84615385 0.73684211
|
|
0.66666667 0.76923077 0.86956522 0.69565217]
|
|
|
|
mean value: 0.7890818853152949
|
|
|
|
key: train_fscore
|
|
value: [0.95412844 0.94977169 0.94930876 0.95454545 0.96803653 0.94495413
|
|
0.95412844 0.96330275 0.95454545 0.96330275]
|
|
|
|
mean value: 0.9556024397790828
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.875 0.75 0.84615385 0.78571429 1.
|
|
0.66666667 0.71428571 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8183275058275058
|
|
|
|
key: train_precision /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
value: [0.94545455 0.93693694 0.94495413 0.9375 0.95495495 0.93636364
|
|
0.94545455 0.95454545 0.9375 0.95454545]
|
|
|
|
mean value: 0.9448209656695895
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.58333333 1. 0.91666667 0.91666667 0.58333333
|
|
0.66666667 0.83333333 0.83333333 0.66666667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.96296296 0.96296296 0.9537037 0.97222222 0.98148148 0.9537037
|
|
0.96296296 0.97222222 0.97222222 0.97222222]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.83333333 0.875 0.83333333 0.79166667
|
|
0.66666667 0.75 0.875 0.70833333]
|
|
|
|
mean value: 0.7958333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.9537037 0.94907407 0.94907407 0.9537037 0.96759259 0.94444444
|
|
0.9537037 0.96296296 0.9537037 0.96296296]
|
|
|
|
mean value: 0.9550925925925926
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.53846154 0.75 0.78571429 0.73333333 0.58333333
|
|
0.5 0.625 0.76923077 0.53333333]
|
|
|
|
mean value: 0.6587637362637363
|
|
|
|
key: train_jcc
|
|
value: [0.9122807 0.90434783 0.90350877 0.91304348 0.9380531 0.89565217
|
|
0.9122807 0.92920354 0.91304348 0.92920354]
|
|
|
|
mean value: 0.9150617308951486
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02310514 0.0093317 0.00927734 0.00936031 0.01038909 0.01009369
|
|
0.01033616 0.01032877 0.01018953 0.00961852]
|
|
|
|
mean value: 0.01120302677154541
|
|
|
|
key: score_time
|
|
value: [0.01308703 0.00966859 0.0088737 0.0088706 0.00949836 0.00903583
|
|
0.00962281 0.00909829 0.00949168 0.00966215]
|
|
|
|
mean value: 0.00969090461730957
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.35355339 0. 0.70710678 0.33333333 0.58536941
|
|
0.25819889 0.58536941 0.84515425 0.83333333]
|
|
|
|
mean value: 0.5086788203937056
|
|
|
|
key: train_mcc
|
|
value: [0.61205637 0.62060985 0.65743559 0.64993368 0.66222239 0.62039697
|
|
0.66712438 0.64695398 0.62253572 0.58760578]
|
|
|
|
mean value: 0.6346874717823057
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.66666667 0.5 0.83333333 0.66666667 0.79166667
|
|
0.625 0.79166667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_accuracy
|
|
value: [0.80555556 0.81018519 0.8287037 0.82407407 0.8287037 0.81018519
|
|
0.83333333 0.81944444 0.81018519 0.79166667]
|
|
|
|
mean value: 0.8162037037037038
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.6 0.57142857 0.85714286 0.66666667 0.8
|
|
0.66666667 0.7826087 0.92307692 0.91666667]
|
|
|
|
mean value: 0.7566865742952699
|
|
|
|
key: train_fscore
|
|
value: [0.81081081 0.81278539 0.82949309 0.83035714 0.83842795 0.81105991
|
|
0.83636364 0.83261803 0.81777778 0.80349345]
|
|
|
|
mean value: 0.8223187174459913
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.75 0.5 0.75 0.66666667 0.76923077
|
|
0.6 0.81818182 0.85714286 0.91666667]
|
|
|
|
mean value: 0.7446070596070596
|
|
|
|
key: train_precision
|
|
value: [0.78947368 0.8018018 0.82568807 0.80172414 0.79338843 0.80733945
|
|
0.82142857 0.776 0.78632479 0.76033058]
|
|
|
|
mean value: 0.7963499512896963
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.66666667 1. 0.66666667 0.83333333
|
|
0.75 0.75 1. 0.91666667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.83333333 0.82407407 0.83333333 0.86111111 0.88888889 0.81481481
|
|
0.85185185 0.89814815 0.85185185 0.85185185]
|
|
|
|
mean value: 0.850925925925926
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.66666667 0.5 0.83333333 0.66666667 0.79166667
|
|
0.625 0.79166667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [0.80555556 0.81018519 0.8287037 0.82407407 0.8287037 0.81018519
|
|
0.83333333 0.81944444 0.81018519 0.79166667]
|
|
|
|
mean value: 0.8162037037037037
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.42857143 0.4 0.75 0.5 0.66666667
|
|
0.5 0.64285714 0.85714286 0.84615385]
|
|
|
|
mean value: 0.6234249084249084
|
|
|
|
key: train_jcc
|
|
value: [0.68181818 0.68461538 0.70866142 0.70992366 0.72180451 0.68217054
|
|
0.71875 0.71323529 0.69172932 0.67153285]
|
|
|
|
mean value: 0.6984241165933639
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.11020756 0.05813694 0.06567621 0.06768131 0.06942606 0.06736112
|
|
0.06523728 0.06494617 0.06630445 0.07134366]
|
|
|
|
mean value: 0.07063207626342774
|
|
|
|
key: score_time
|
|
value: [0.01092362 0.01058197 0.01053071 0.01055431 0.01047707 0.01060176
|
|
0.01060224 0.01061916 0.01055455 0.01064134]
|
|
|
|
mean value: 0.010608673095703125
|
|
|
|
key: test_mcc
|
|
value: [0.83333333 0.38490018 0.57735027 0.6761234 0.58536941 0.53033009
|
|
0.58536941 0.2508726 0.75261781 0.33333333]
|
|
|
|
mean value: 0.5509599831007203
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.66666667 0.75 0.83333333 0.79166667 0.75
|
|
0.79166667 0.625 0.875 0.66666667]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.55555556 0.8 0.84615385 0.7826087 0.7
|
|
0.7826087 0.64 0.88 0.66666667]
|
|
|
|
mean value: 0.7570260126347083
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.83333333 0.66666667 0.78571429 0.81818182 0.875
|
|
0.81818182 0.61538462 0.84615385 0.66666667]
|
|
|
|
mean value: 0.7841949716949717
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.41666667 1. 0.91666667 0.75 0.58333333
|
|
0.75 0.66666667 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.66666667 0.75 0.83333333 0.79166667 0.75
|
|
0.79166667 0.625 0.875 0.66666667]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.38461538 0.66666667 0.73333333 0.64285714 0.53846154
|
|
0.64285714 0.47058824 0.78571429 0.5 ]
|
|
|
|
mean value: 0.6211247575953458
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03139472 0.07590818 0.06847668 0.03701425 0.06345892 0.07317209
|
|
0.05685735 0.05946326 0.0596664 0.0610683 ]
|
|
|
|
mean value: 0.05864801406860352
|
|
|
|
key: score_time
|
|
value: [0.02168989 0.02412105 0.01233745 0.01214147 0.02543545 0.01350594
|
|
0.01780462 0.02308083 0.02142167 0.02297115]
|
|
|
|
mean value: 0.019450950622558593
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.30779351 0.16903085 0.41812101 0.16666667 0.83333333
|
|
0.50709255 0.58536941 0.75261781 0.2508726 ]
|
|
|
|
mean value: 0.4657564400092044
|
|
|
|
key: train_mcc
|
|
value: [0.92608473 0.96296296 0.94444444 0.96296296 0.94460643 0.94444444
|
|
0.95374459 0.96312812 0.95374459 0.97259753]
|
|
|
|
mean value: 0.9528720801015835
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.625 0.58333333 0.70833333 0.58333333 0.91666667
|
|
0.75 0.79166667 0.875 0.625 ]
|
|
|
|
mean value: 0.7291666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.96296296 0.98148148 0.97222222 0.98148148 0.97222222 0.97222222
|
|
0.97685185 0.98148148 0.97685185 0.98611111]
|
|
|
|
mean value: 0.9763888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.70967742 0.61538462 0.72 0.58333333 0.91666667
|
|
0.76923077 0.7826087 0.86956522 0.64 ]
|
|
|
|
mean value: 0.7439800050347034
|
|
|
|
key: train_fscore
|
|
value: [0.96330275 0.98148148 0.97222222 0.98148148 0.97247706 0.97222222
|
|
0.97674419 0.98165138 0.97695853 0.98630137]
|
|
|
|
mean value: 0.9764842681323106
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.57894737 0.57142857 0.69230769 0.58333333 0.91666667
|
|
0.71428571 0.81818182 0.90909091 0.61538462]
|
|
|
|
mean value: 0.7232960022433707
|
|
|
|
key: train_precision
|
|
value: [0.95454545 0.98148148 0.97222222 0.98148148 0.96363636 0.97222222
|
|
0.98130841 0.97272727 0.97247706 0.97297297]
|
|
|
|
mean value: 0.9725074946724608
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.91666667 0.66666667 0.75 0.58333333 0.91666667
|
|
0.83333333 0.75 0.83333333 0.66666667]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [0.97222222 0.98148148 0.97222222 0.98148148 0.98148148 0.97222222
|
|
0.97222222 0.99074074 0.98148148 1. ]
|
|
|
|
mean value: 0.9805555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.625 0.58333333 0.70833333 0.58333333 0.91666667
|
|
0.75 0.79166667 0.875 0.625 ]
|
|
|
|
mean value: 0.7291666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.96296296 0.98148148 0.97222222 0.98148148 0.97222222 0.97222222
|
|
0.97685185 0.98148148 0.97685185 0.98611111]
|
|
|
|
mean value: 0.9763888888888889
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.55 0.44444444 0.5625 0.41176471 0.84615385
|
|
0.625 0.64285714 0.76923077 0.47058824]
|
|
|
|
mean value: 0.6036824858148387
|
|
|
|
key: train_jcc
|
|
value: [0.92920354 0.96363636 0.94594595 0.96363636 0.94642857 0.94594595
|
|
0.95454545 0.96396396 0.95495495 0.97297297]
|
|
|
|
mean value: 0.9541234076853546
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01248264 0.01069927 0.01052046 0.01032162 0.01016784 0.01024556
|
|
0.01046252 0.01028323 0.01026583 0.01017547]
|
|
|
|
mean value: 0.010562443733215332
|
|
|
|
key: score_time
|
|
value: [0.00969648 0.01000333 0.00978112 0.00962114 0.00963092 0.00957775
|
|
0.0096333 0.0095582 0.00949764 0.00952911]
|
|
|
|
mean value: 0.00965290069580078
|
|
|
|
key: test_mcc
|
|
value: [ 0.43033148 0.41812101 -0.0860663 0.45834925 0.60246408 0.41812101
|
|
0.50709255 0.58536941 0.41812101 0.66666667]
|
|
|
|
mean value: 0.44185701524397397
|
|
|
|
key: train_mcc
|
|
value: [0.48685383 0.48685383 0.49693566 0.49554356 0.47684381 0.47568087
|
|
0.47568087 0.46812868 0.49554356 0.49693566]
|
|
|
|
mean value: 0.48550003388620916
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.70833333 0.45833333 0.70833333 0.79166667 0.70833333
|
|
0.75 0.79166667 0.70833333 0.83333333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.74074074 0.74074074 0.74537037 0.74537037 0.73611111 0.73611111
|
|
0.73611111 0.73148148 0.74537037 0.74537037]
|
|
|
|
mean value: 0.7402777777777778
|
|
|
|
key: test_fscore
|
|
value: [0.74074074 0.72 0.51851852 0.75862069 0.81481481 0.72
|
|
0.76923077 0.8 0.69565217 0.83333333]
|
|
|
|
mean value: 0.7370911040206393
|
|
|
|
key: train_fscore
|
|
value: [0.75862069 0.75862069 0.7639485 0.76190476 0.75324675 0.7510917
|
|
0.7510917 0.75 0.76190476 0.7639485 ]
|
|
|
|
mean value: 0.7574378058188314
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.69230769 0.46666667 0.64705882 0.73333333 0.69230769
|
|
0.71428571 0.76923077 0.72727273 0.83333333]
|
|
|
|
mean value: 0.6942463418934007
|
|
|
|
key: train_precision
|
|
value: [0.70967742 0.70967742 0.712 0.71544715 0.70731707 0.7107438
|
|
0.7107438 0.7016129 0.71544715 0.712 ]
|
|
|
|
mean value: 0.710466672735509
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.58333333 0.91666667 0.91666667 0.75
|
|
0.83333333 0.83333333 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_recall
|
|
value: [0.81481481 0.81481481 0.82407407 0.81481481 0.80555556 0.7962963
|
|
0.7962963 0.80555556 0.81481481 0.82407407]
|
|
|
|
mean value: 0.8111111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.45833333 0.70833333 0.79166667 0.70833333
|
|
0.75 0.79166667 0.70833333 0.83333333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.74074074 0.74074074 0.74537037 0.74537037 0.73611111 0.73611111
|
|
0.73611111 0.73148148 0.74537037 0.74537037]
|
|
|
|
mean value: 0.7402777777777779
|
|
|
|
key: test_jcc
|
|
value: [0.58823529 0.5625 0.35 0.61111111 0.6875 0.5625
|
|
0.625 0.66666667 0.53333333 0.71428571]
|
|
|
|
mean value: 0.5901132119514473
|
|
|
|
key: train_jcc
|
|
value: [0.61111111 0.61111111 0.61805556 0.61538462 0.60416667 0.6013986
|
|
0.6013986 0.6 0.61538462 0.61805556]
|
|
|
|
mean value: 0.6096066433566434
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01346827 0.01688552 0.01641726 0.01505566 0.01531744 0.0147779
|
|
0.0166719 0.01709533 0.01546025 0.0159111 ]
|
|
|
|
mean value: 0.01570606231689453
|
|
|
|
key: score_time
|
|
value: [0.00915766 0.0112474 0.01120234 0.01165509 0.01171255 0.01177716
|
|
0.01172066 0.01178312 0.01171827 0.01173067]
|
|
|
|
mean value: 0.011370491981506348
|
|
|
|
key: test_mcc
|
|
value: [0.6761234 0.53033009 0.33333333 0.57735027 0.53033009 0.45834925
|
|
0.53033009 0.58536941 0.37796447 0.58536941]
|
|
|
|
mean value: 0.5184849799508764
|
|
|
|
key: train_mcc
|
|
value: [0.74307085 0.87996919 0.78656204 0.64888568 0.79848995 0.77831178
|
|
0.74779086 0.80836728 0.60587838 0.79473968]
|
|
|
|
mean value: 0.7592065697400981
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.66666667 0.75 0.75 0.70833333
|
|
0.75 0.79166667 0.625 0.79166667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.86574074 0.93981481 0.88425926 0.7962963 0.89814815 0.88888889
|
|
0.86111111 0.90277778 0.76851852 0.89351852]
|
|
|
|
mean value: 0.8699074074074074
|
|
|
|
key: test_fscore
|
|
value: [0.84615385 0.7 0.66666667 0.8 0.78571429 0.63157895
|
|
0.7 0.8 0.4 0.8 ]
|
|
|
|
mean value: 0.7130113745903219
|
|
|
|
key: train_fscore
|
|
value: [0.87659574 0.94063927 0.87046632 0.83076923 0.90178571 0.88679245
|
|
0.84042553 0.89855072 0.69879518 0.9004329 ]
|
|
|
|
mean value: 0.8645253070924268
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.875 0.66666667 0.66666667 0.6875 0.85714286
|
|
0.875 0.76923077 1. 0.76923077]
|
|
|
|
mean value: 0.7952152014652014
|
|
|
|
key: train_precision
|
|
value: [0.81102362 0.92792793 0.98823529 0.71052632 0.87068966 0.90384615
|
|
0.9875 0.93939394 1. 0.84552846]
|
|
|
|
mean value: 0.8984671363579353
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.58333333 0.66666667 1. 0.91666667 0.5
|
|
0.58333333 0.83333333 0.25 0.83333333]
|
|
|
|
mean value: 0.7083333333333334
|
|
|
|
key: train_recall
|
|
value: [0.9537037 0.9537037 0.77777778 1. 0.93518519 0.87037037
|
|
0.73148148 0.86111111 0.53703704 0.96296296]
|
|
|
|
mean value: 0.8583333333333334
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.75 0.66666667 0.75 0.75 0.70833333
|
|
0.75 0.79166667 0.625 0.79166667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.86574074 0.93981481 0.88425926 0.7962963 0.89814815 0.88888889
|
|
0.86111111 0.90277778 0.76851852 0.89351852]
|
|
|
|
mean value: 0.8699074074074074
|
|
|
|
key: test_jcc
|
|
value: [0.73333333 0.53846154 0.5 0.66666667 0.64705882 0.46153846
|
|
0.53846154 0.66666667 0.25 0.66666667]
|
|
|
|
mean value: 0.5668853695324283
|
|
|
|
key: train_jcc
|
|
value: [0.78030303 0.88793103 0.7706422 0.71052632 0.82113821 0.79661017
|
|
0.72477064 0.81578947 0.53703704 0.81889764]
|
|
|
|
mean value: 0.7663645754002122
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0160954 0.01449347 0.01418686 0.01509547 0.01532292 0.01373005
|
|
0.01380444 0.0151577 0.01477861 0.01720095]
|
|
|
|
mean value: 0.014986586570739747
|
|
|
|
key: score_time
|
|
value: [0.01187539 0.01171732 0.01166487 0.01170754 0.01174474 0.01170206
|
|
0.01171851 0.01169991 0.01186538 0.01181006]
|
|
|
|
mean value: 0.011750578880310059
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.58536941 0.58536941 0.30151134 0.3380617 0.4472136
|
|
0.4472136 0.58536941 0.37796447 0.3380617 ]
|
|
|
|
mean value: 0.4672801300051277
|
|
|
|
key: train_mcc
|
|
value: [0.84553359 0.62733435 0.80125769 0.46940279 0.85243671 0.5487044
|
|
0.51225071 0.83333333 0.53452248 0.80235109]
|
|
|
|
mean value: 0.6827127149571861
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.79166667 0.79166667 0.58333333 0.66666667 0.66666667
|
|
0.66666667 0.79166667 0.625 0.66666667]
|
|
|
|
mean value: 0.7083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.9212963 0.78240741 0.89814815 0.68055556 0.92592593 0.73611111
|
|
0.71296296 0.91666667 0.72222222 0.89351852]
|
|
|
|
mean value: 0.8189814814814815
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.8 0.8 0.28571429 0.69230769 0.5
|
|
0.5 0.8 0.4 0.69230769]
|
|
|
|
mean value: 0.6303663003663004
|
|
|
|
key: train_fscore
|
|
value: [0.92444444 0.82129278 0.89215686 0.53061224 0.92727273 0.64596273
|
|
0.6025641 0.91666667 0.61538462 0.90295359]
|
|
|
|
mean value: 0.7779310759058158
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.76923077 0.76923077 1. 0.64285714 1.
|
|
1. 0.76923077 1. 0.64285714]
|
|
|
|
mean value: 0.8426739926739927
|
|
|
|
key: train_precision
|
|
value: [0.88888889 0.69677419 0.94791667 1. 0.91071429 0.98113208
|
|
0.97916667 0.91666667 1. 0.82945736]
|
|
|
|
mean value: 0.9150716807964345
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.83333333 0.83333333 0.16666667 0.75 0.33333333
|
|
0.33333333 0.83333333 0.25 0.75 ]
|
|
|
|
mean value: 0.5916666666666667
|
|
|
|
key: train_recall
|
|
value: [0.96296296 1. 0.84259259 0.36111111 0.94444444 0.48148148
|
|
0.43518519 0.91666667 0.44444444 0.99074074]
|
|
|
|
mean value: 0.7379629629629629
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.79166667 0.79166667 0.58333333 0.66666667 0.66666667
|
|
0.66666667 0.79166667 0.625 0.66666667]
|
|
|
|
mean value: 0.7083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.9212963 0.78240741 0.89814815 0.68055556 0.92592593 0.73611111
|
|
0.71296296 0.91666667 0.72222222 0.89351852]
|
|
|
|
mean value: 0.8189814814814814
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.66666667 0.66666667 0.16666667 0.52941176 0.33333333
|
|
0.33333333 0.66666667 0.25 0.52941176]
|
|
|
|
mean value: 0.48564425770308123
|
|
|
|
key: train_jcc
|
|
value: [0.85950413 0.69677419 0.80530973 0.36111111 0.86440678 0.47706422
|
|
0.43119266 0.84615385 0.44444444 0.82307692]
|
|
|
|
mean value: 0.6609038045474354
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1361053 0.12321711 0.11989808 0.12128091 0.12607265 0.12655473
|
|
0.12733555 0.12603474 0.12223935 0.12803626]
|
|
|
|
mean value: 0.1256774663925171
|
|
|
|
key: score_time
|
|
value: [0.01632547 0.01506424 0.01524639 0.01524496 0.01628733 0.01632857
|
|
0.01587772 0.01606822 0.01655674 0.01658249]
|
|
|
|
mean value: 0.015958213806152345
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.57735027 0.70710678 0.75261781 0.2508726 0.50709255
|
|
0.41812101 0.41812101 1. 0.41812101]
|
|
|
|
mean value: 0.5716069696899095
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.99078321 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999078321349667
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.83333333 0.875 0.625 0.75
|
|
0.70833333 0.70833333 1. 0.70833333]
|
|
|
|
mean value: 0.7791666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.99537037 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999537037037037
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.66666667 0.85714286 0.88 0.60869565 0.72727273
|
|
0.72 0.69565217 1. 0.72 ]
|
|
|
|
mean value: 0.770876341050254
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.99539171 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9995391705069124
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 0.75 0.84615385 0.63636364 0.8
|
|
0.69230769 0.72727273 1. 0.69230769]
|
|
|
|
mean value: 0.7977738927738928
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.99082569 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990825688073395
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.5 1. 0.91666667 0.58333333 0.66666667
|
|
0.75 0.66666667 1. 0.75 ]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.75 0.83333333 0.875 0.625 0.75
|
|
0.70833333 0.70833333 1. 0.70833333]
|
|
|
|
mean value: 0.7791666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.99537037 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999537037037037
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.5 0.75 0.78571429 0.4375 0.57142857
|
|
0.5625 0.53333333 1. 0.5625 ]
|
|
|
|
mean value: 0.6417261904761905
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.99082569 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990825688073395
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05251479 0.05213046 0.04939222 0.04378223 0.05401325 0.05549884
|
|
0.04612184 0.05807853 0.06026721 0.06066704]
|
|
|
|
mean value: 0.05324664115905762
|
|
|
|
key: score_time
|
|
value: [0.02272248 0.03480697 0.02512097 0.02819991 0.03404307 0.02167606
|
|
0.03094029 0.03069401 0.04057026 0.09532309]
|
|
|
|
mean value: 0.03640971183776855
|
|
|
|
key: test_mcc
|
|
value: [0.83333333 0.38490018 0.60246408 0.50709255 0.43033148 0.53033009
|
|
0.50709255 0.60246408 0.91986621 0.75261781]
|
|
|
|
mean value: 0.607049235943675
|
|
|
|
key: train_mcc
|
|
value: [0.98164982 0.9459053 0.9459053 0.95407186 0.99078321 0.97259753
|
|
0.96362411 0.98164982 0.95407186 0.98148148]
|
|
|
|
mean value: 0.9671740288105546
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.66666667 0.79166667 0.75 0.70833333 0.75
|
|
0.75 0.79166667 0.95833333 0.875 ]
|
|
|
|
mean value: 0.7958333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.99074074 0.97222222 0.97222222 0.97685185 0.99537037 0.98611111
|
|
0.98148148 0.99074074 0.97685185 0.99074074]
|
|
|
|
mean value: 0.9833333333333333
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.55555556 0.81481481 0.72727273 0.66666667 0.7
|
|
0.72727273 0.76190476 0.95652174 0.86956522]
|
|
|
|
mean value: 0.769624087667566
|
|
|
|
key: train_fscore
|
|
value: [0.99065421 0.97142857 0.97142857 0.97652582 0.99534884 0.98591549
|
|
0.98113208 0.99065421 0.97652582 0.99074074]
|
|
|
|
mean value: 0.9830354343644072
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.83333333 0.73333333 0.8 0.77777778 0.875
|
|
0.8 0.88888889 1. 0.90909091]
|
|
|
|
mean value: 0.8534090909090909
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.99047619 1. 1.
|
|
1. 1. 0.99047619 0.99074074]
|
|
|
|
mean value: 0.9971693121693121
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.41666667 0.91666667 0.66666667 0.58333333 0.58333333
|
|
0.66666667 0.66666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_recall
|
|
value: [0.98148148 0.94444444 0.94444444 0.96296296 0.99074074 0.97222222
|
|
0.96296296 0.98148148 0.96296296 0.99074074]
|
|
|
|
mean value: 0.9694444444444444
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.66666667 0.79166667 0.75 0.70833333 0.75
|
|
0.75 0.79166667 0.95833333 0.875 ]
|
|
|
|
mean value: 0.7958333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.99074074 0.97222222 0.97222222 0.97685185 0.99537037 0.98611111
|
|
0.98148148 0.99074074 0.97685185 0.99074074]
|
|
|
|
mean value: 0.9833333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.38461538 0.6875 0.57142857 0.5 0.53846154
|
|
0.57142857 0.61538462 0.91666667 0.76923077]
|
|
|
|
mean value: 0.6400869963369963
|
|
|
|
key: train_jcc
|
|
value: [0.98148148 0.94444444 0.94444444 0.95412844 0.99074074 0.97222222
|
|
0.96296296 0.98148148 0.95412844 0.98165138]
|
|
|
|
mean value: 0.9667686034658511
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0322156 0.02812076 0.05939436 0.04106569 0.02669883 0.02819943
|
|
0.06717181 0.06473112 0.06225467 0.054461 ]
|
|
|
|
mean value: 0.046431326866149904
|
|
|
|
key: score_time
|
|
value: [0.01301074 0.01293302 0.02412224 0.01273417 0.01269698 0.01300836
|
|
0.0239048 0.01927137 0.02405953 0.01356697]
|
|
|
|
mean value: 0.016930818557739258
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.3380617 0. 0.58536941 0.3380617 0.58536941
|
|
0.25819889 0.43033148 0.6761234 0.5 ]
|
|
|
|
mean value: 0.44641338032410316
|
|
|
|
key: train_mcc
|
|
value: [0.98164982 0.98164982 0.98164982 0.99078321 0.99078321 0.98164982
|
|
0.98164982 0.98164982 0.98164982 0.98164982]
|
|
|
|
mean value: 0.9834764964705683
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.66666667 0.5 0.79166667 0.66666667 0.79166667
|
|
0.625 0.70833333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7208333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.99074074 0.99074074 0.99074074 0.99537037 0.99537037 0.99074074
|
|
0.99074074 0.99074074 0.99074074 0.99074074]
|
|
|
|
mean value: 0.9916666666666666
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.63636364 0.57142857 0.8 0.69230769 0.8
|
|
0.66666667 0.74074074 0.84615385 0.75 ]
|
|
|
|
mean value: 0.7383661153661154
|
|
|
|
key: train_fscore
|
|
value: [0.99082569 0.99082569 0.99082569 0.99539171 0.99539171 0.99082569
|
|
0.99082569 0.99082569 0.99082569 0.99082569]
|
|
|
|
mean value: 0.9917388914725405
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.7 0.5 0.76923077 0.64285714 0.76923077
|
|
0.6 0.66666667 0.78571429 0.75 ]
|
|
|
|
mean value: 0.702985347985348
|
|
|
|
key: train_precision
|
|
value: [0.98181818 0.98181818 0.98181818 0.99082569 0.99082569 0.98181818
|
|
0.98181818 0.98181818 0.98181818 0.98181818]
|
|
|
|
mean value: 0.9836196830692243
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.58333333 0.66666667 0.83333333 0.75 0.83333333
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.66666667 0.5 0.79166667 0.66666667 0.79166667
|
|
0.625 0.70833333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7208333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.99074074 0.99074074 0.99074074 0.99537037 0.99537037 0.99074074
|
|
0.99074074 0.99074074 0.99074074 0.99074074]
|
|
|
|
mean value: 0.9916666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.46666667 0.4 0.66666667 0.52941176 0.66666667
|
|
0.5 0.58823529 0.73333333 0.6 ]
|
|
|
|
mean value: 0.5936694677871148
|
|
|
|
key: train_jcc
|
|
value: [0.98181818 0.98181818 0.98181818 0.99082569 0.99082569 0.98181818
|
|
0.98181818 0.98181818 0.98181818 0.98181818]
|
|
|
|
mean value: 0.9836196830692243
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.40719414 0.38963819 0.38802338 0.38530183 0.39083529 0.38973022
|
|
0.3902452 0.38925004 0.38730001 0.3895514 ]
|
|
|
|
mean value: 0.39070696830749513
|
|
|
|
key: score_time
|
|
value: [0.00941658 0.00929379 0.00939155 0.00930834 0.00956917 0.00928354
|
|
0.00918388 0.00929189 0.00934386 0.00926805]
|
|
|
|
mean value: 0.009335064888000488
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.45834925 0.45834925 0.84515425 0.6761234 0.77459667
|
|
0.50709255 0.58536941 0.75261781 0.41812101]
|
|
|
|
mean value: 0.6142440265299691
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.70833333 0.70833333 0.91666667 0.83333333 0.875
|
|
0.75 0.79166667 0.875 0.70833333]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.63157895 0.75862069 0.92307692 0.84615385 0.85714286
|
|
0.72727273 0.8 0.88 0.69565217]
|
|
|
|
mean value: 0.7952831497916324
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.85714286 0.64705882 0.85714286 0.78571429 1.
|
|
0.8 0.76923077 0.84615385 0.72727273]
|
|
|
|
mean value: 0.8123049499520087
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.5 0.91666667 1. 0.91666667 0.75
|
|
0.66666667 0.83333333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.70833333 0.70833333 0.91666667 0.83333333 0.875
|
|
0.75 0.79166667 0.875 0.70833333]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.46153846 0.61111111 0.85714286 0.73333333 0.75
|
|
0.57142857 0.66666667 0.78571429 0.53333333]
|
|
|
|
mean value: 0.6684554334554335
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02204514 0.02083945 0.02080059 0.02074742 0.03952312 0.02238369
|
|
0.06109738 0.03359938 0.02302122 0.02167463]
|
|
|
|
mean value: 0.028573203086853027
|
|
|
|
key: score_time
|
|
value: [0.0126133 0.01318598 0.01951098 0.01508093 0.0224371 0.01442599
|
|
0.01425624 0.02035761 0.01714063 0.01932621]
|
|
|
|
mean value: 0.01683349609375
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.50709255 0. 0.43033148 0.2508726 0.50709255
|
|
0.25819889 0.33333333 0.3380617 0.16903085]
|
|
|
|
mean value: 0.35466317765122685
|
|
|
|
key: train_mcc
|
|
value: [1. 0.98164982 0.96362411 1. 0.88607221 1.
|
|
0.96362411 1. 0.90284331 0.97259753]
|
|
|
|
mean value: 0.9670411093291738
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.5 0.70833333 0.625 0.75
|
|
0.625 0.66666667 0.66666667 0.58333333]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99074074 0.98148148 1. 0.93981481 1.
|
|
0.98148148 1. 0.94907407 0.98611111]
|
|
|
|
mean value: 0.9828703703703704
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.72727273 0.53846154 0.74074074 0.60869565 0.76923077
|
|
0.66666667 0.66666667 0.69230769 0.61538462]
|
|
|
|
mean value: 0.690542706890533
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99082569 0.98181818 1. 0.94323144 1.
|
|
0.98181818 1. 0.95154185 0.98630137]
|
|
|
|
mean value: 0.9835536712841071
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.8 0.5 0.66666667 0.63636364 0.71428571
|
|
0.6 0.66666667 0.64285714 0.57142857]
|
|
|
|
mean value: 0.6644422244422244
|
|
|
|
key: train_precision
|
|
value: [1. 0.98181818 0.96428571 1. 0.89256198 1.
|
|
0.96428571 1. 0.90756303 0.97297297]
|
|
|
|
mean value: 0.9683487592043742
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.66666667 0.58333333 0.83333333 0.58333333 0.83333333
|
|
0.75 0.66666667 0.75 0.66666667]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.5 0.70833333 0.625 0.75
|
|
0.625 0.66666667 0.66666667 0.58333333]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99074074 0.98148148 1. 0.93981481 1.
|
|
0.98148148 1. 0.94907407 0.98611111]
|
|
|
|
mean value: 0.9828703703703704
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.57142857 0.36842105 0.58823529 0.4375 0.625
|
|
0.5 0.5 0.52941176 0.44444444]
|
|
|
|
mean value: 0.535015541304241
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98181818 0.96428571 1. 0.89256198 1.
|
|
0.96428571 1. 0.90756303 0.97297297]
|
|
|
|
mean value: 0.9683487592043742
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01443124 0.01399136 0.01988292 0.03507066 0.03442311 0.03594494
|
|
0.01563334 0.0141995 0.01398373 0.01407266]
|
|
|
|
mean value: 0.021163344383239746
|
|
|
|
key: score_time
|
|
value: [0.01197124 0.01205635 0.02042127 0.02123404 0.01202655 0.01211452
|
|
0.01195836 0.01187086 0.0119369 0.01197267]
|
|
|
|
mean value: 0.013756275177001953
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.60246408 0.45834925 0.60246408 0.41812101 0.58536941
|
|
0.33333333 0.5 0.91986621 0.58536941]
|
|
|
|
mean value: 0.5757954573028512
|
|
|
|
key: train_mcc
|
|
value: [0.85243671 0.88904134 0.90756304 0.87171665 0.88904134 0.88904134
|
|
0.89849486 0.87171665 0.86203543 0.90803041]
|
|
|
|
mean value: 0.8839117784516624
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.79166667 0.70833333 0.79166667 0.70833333 0.79166667
|
|
0.66666667 0.75 0.95833333 0.79166667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.92592593 0.94444444 0.9537037 0.93518519 0.94444444 0.94444444
|
|
0.94907407 0.93518519 0.93055556 0.9537037 ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.76190476 0.75862069 0.81481481 0.72 0.7826087
|
|
0.66666667 0.75 0.96 0.7826087 ]
|
|
|
|
mean value: 0.7866789541737068
|
|
|
|
key: train_fscore
|
|
value: [0.92727273 0.94495413 0.95412844 0.93693694 0.94495413 0.94495413
|
|
0.94977169 0.93693694 0.9321267 0.95454545]
|
|
|
|
mean value: 0.9426581267710425
|
|
|
|
key: test_precision
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:135: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:138: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.90909091 0.88888889 0.64705882 0.73333333 0.69230769 0.81818182
|
|
0.66666667 0.75 0.92307692 0.81818182]
|
|
|
|
mean value: 0.7846786873257462
|
|
|
|
key: train_precision
|
|
value: [0.91071429 0.93636364 0.94545455 0.9122807 0.93636364 0.93636364
|
|
0.93693694 0.9122807 0.91150442 0.9375 ]
|
|
|
|
mean value: 0.927576250548421
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.66666667 0.91666667 0.91666667 0.75 0.75
|
|
0.66666667 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [0.94444444 0.9537037 0.96296296 0.96296296 0.9537037 0.9537037
|
|
0.96296296 0.96296296 0.9537037 0.97222222]
|
|
|
|
mean value: 0.9583333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.79166667 0.70833333 0.79166667 0.70833333 0.79166667
|
|
0.66666667 0.75 0.95833333 0.79166667]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.92592593 0.94444444 0.9537037 0.93518519 0.94444444 0.94444444
|
|
0.94907407 0.93518519 0.93055556 0.9537037 ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.61538462 0.61111111 0.6875 0.5625 0.64285714
|
|
0.5 0.6 0.92307692 0.64285714]
|
|
|
|
mean value: 0.6554517704517705
|
|
|
|
key: train_jcc
|
|
value: [0.86440678 0.89565217 0.9122807 0.88135593 0.89565217 0.89565217
|
|
0.90434783 0.88135593 0.87288136 0.91304348]
|
|
|
|
mean value: 0.8916628527841343
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.28584599 0.23216796 0.22558236 0.22630048 0.30875087 0.23271036
|
|
0.22683549 0.2345891 0.24051881 0.27153778]
|
|
|
|
mean value: 0.24848392009735107
|
|
|
|
key: score_time
|
|
value: [0.01207423 0.02121472 0.02310538 0.0239768 0.0239315 0.0196178
|
|
0.02267861 0.02345586 0.02220201 0.02112508]
|
|
|
|
mean value: 0.02133820056915283
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.58536941 0.64168895 0.58536941 0.60246408 0.60246408
|
|
0.5 0.58536941 0.75261781 0.66666667]
|
|
|
|
mean value: 0.6274627605767488
|
|
|
|
key: train_mcc
|
|
value: [0.74704394 0.78978412 0.77253603 0.75261781 0.77120096 0.77992042
|
|
0.78869542 0.77120096 0.77013788 0.75158034]
|
|
|
|
mean value: 0.7694717888512735
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.79166667 0.79166667 0.79166667 0.79166667 0.79166667
|
|
0.75 0.79166667 0.875 0.83333333]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.87037037 0.89351852 0.88425926 0.875 0.88425926 0.88888889
|
|
0.89351852 0.88425926 0.88425926 0.875 ]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.7826087 0.82758621 0.8 0.81481481 0.76190476
|
|
0.75 0.8 0.88 0.83333333]
|
|
|
|
mean value: 0.8130247812601635
|
|
|
|
key: train_fscore
|
|
value: [0.87826087 0.89777778 0.88986784 0.88 0.88888889 0.89285714
|
|
0.89686099 0.88888889 0.88789238 0.87892377]
|
|
|
|
mean value: 0.8880218539432451
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.81818182 0.70588235 0.76923077 0.73333333 0.88888889
|
|
0.75 0.76923077 0.84615385 0.83333333]
|
|
|
|
mean value: 0.7960388957447782
|
|
|
|
key: train_precision
|
|
value: [0.82786885 0.86324786 0.8487395 0.84615385 0.85470085 0.86206897
|
|
0.86956522 0.85470085 0.86086957 0.85217391]
|
|
|
|
mean value: 0.854008942823017
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.75 1. 0.83333333 0.91666667 0.66666667
|
|
0.75 0.83333333 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8416666666666667
|
|
|
|
key: train_recall
|
|
value: [0.93518519 0.93518519 0.93518519 0.91666667 0.92592593 0.92592593
|
|
0.92592593 0.92592593 0.91666667 0.90740741]
|
|
|
|
mean value: 0.925
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.79166667 0.79166667 0.79166667 0.79166667 0.79166667
|
|
0.75 0.79166667 0.875 0.83333333]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.87037037 0.89351852 0.88425926 0.875 0.88425926 0.88888889
|
|
0.89351852 0.88425926 0.88425926 0.875 ]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.64285714 0.70588235 0.66666667 0.6875 0.61538462
|
|
0.6 0.66666667 0.78571429 0.71428571]
|
|
|
|
mean value: 0.6870671730230554
|
|
|
|
key: train_jcc
|
|
value: [0.78294574 0.81451613 0.8015873 0.78571429 0.8 0.80645161
|
|
0.81300813 0.8 0.7983871 0.784 ]
|
|
|
|
mean value: 0.7986610292526675
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0308404 0.02457547 0.03144908 0.03146958 0.04254293 0.05157399
|
|
0.04247165 0.04434466 0.05195832 0.03415108]
|
|
|
|
mean value: 0.03853771686553955
|
|
|
|
key: score_time
|
|
value: [0.01679897 0.01194048 0.012079 0.01197195 0.01233697 0.01218081
|
|
0.0123651 0.01207662 0.01558709 0.01403546]
|
|
|
|
mean value: 0.013137245178222656
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.6761234 0.64168895 0.6761234 0.60246408 0.60246408
|
|
0.5 0.58536941 0.83333333 0.58536941]
|
|
|
|
mean value: 0.6288305461987017
|
|
|
|
key: train_mcc
|
|
value: [0.75261781 0.83390548 0.797528 0.77898084 0.82495863 0.77898084
|
|
0.81537425 0.79973188 0.77898084 0.797528 ]
|
|
|
|
mean value: 0.7958586569080919
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.83333333 0.79166667 0.83333333 0.79166667 0.79166667
|
|
0.75 0.79166667 0.91666667 0.79166667]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.875 0.91666667 0.89814815 0.88888889 0.91203704 0.88888889
|
|
0.90740741 0.89814815 0.88888889 0.89814815]
|
|
|
|
mean value: 0.8972222222222223
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.81818182 0.82758621 0.84615385 0.81481481 0.76190476
|
|
0.75 0.8 0.91666667 0.7826087 ]
|
|
|
|
mean value: 0.8100525505922808
|
|
|
|
key: train_fscore
|
|
value: [0.88 0.91818182 0.9009009 0.89189189 0.91402715 0.89189189
|
|
0.90909091 0.90265487 0.89189189 0.9009009 ]
|
|
|
|
mean value: 0.9001432221328108
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.9 0.70588235 0.78571429 0.73333333 0.88888889
|
|
0.75 0.76923077 0.91666667 0.81818182]
|
|
|
|
mean value: 0.8086079933138757
|
|
|
|
key: train_precision
|
|
value: [0.84615385 0.90178571 0.87719298 0.86842105 0.89380531 0.86842105
|
|
0.89285714 0.86440678 0.86842105 0.87719298]
|
|
|
|
mean value: 0.875865791549925
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 1. 0.91666667 0.91666667 0.66666667
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.93518519 0.92592593 0.91666667 0.93518519 0.91666667
|
|
0.92592593 0.94444444 0.91666667 0.92592593]
|
|
|
|
mean value: 0.9259259259259259
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.83333333 0.79166667 0.83333333 0.79166667 0.79166667
|
|
0.75 0.79166667 0.91666667 0.79166667]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.875 0.91666667 0.89814815 0.88888889 0.91203704 0.88888889
|
|
0.90740741 0.89814815 0.88888889 0.89814815]
|
|
|
|
mean value: 0.8972222222222223
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.69230769 0.70588235 0.73333333 0.6875 0.61538462
|
|
0.6 0.66666667 0.84615385 0.64285714]
|
|
|
|
mean value: 0.6832942792501616
|
|
|
|
key: train_jcc
|
|
value: [0.78571429 0.8487395 0.81967213 0.80487805 0.84166667 0.80487805
|
|
0.83333333 0.82258065 0.80487805 0.81967213]
|
|
|
|
mean value: 0.8186012835310441
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.71998596 0.75151277 0.88045883 0.72910571 0.72733355 0.90851116
|
|
0.74258113 0.76826286 0.8899796 0.75839233]
|
|
|
|
mean value: 0.7876123905181884
|
|
|
|
key: score_time
|
|
value: [0.01214218 0.01221347 0.01212549 0.01217508 0.01212287 0.01476812
|
|
0.01231885 0.01283383 0.01213813 0.01209283]
|
|
|
|
mean value: 0.012493085861206055
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.60246408 0.64168895 0.70710678 0.64168895 0.50709255
|
|
0.5 0.58536941 0.6761234 0.66666667]
|
|
|
|
mean value: 0.6280818592400688
|
|
|
|
key: train_mcc
|
|
value: [0.74393663 0.88057382 0.77992042 0.70479219 0.75158034 0.94444444
|
|
0.78869542 0.77120096 0.75158034 0.74188651]
|
|
|
|
mean value: 0.78586110875256
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.79166667 0.79166667 0.83333333 0.79166667 0.75
|
|
0.75 0.79166667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8041666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.87037037 0.93981481 0.88888889 0.85185185 0.875 0.97222222
|
|
0.89351852 0.88425926 0.875 0.87037037]
|
|
|
|
mean value: 0.8921296296296296
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.76190476 0.82758621 0.85714286 0.82758621 0.72727273
|
|
0.75 0.8 0.84615385 0.83333333]
|
|
|
|
mean value: 0.8110979939600629
|
|
|
|
key: train_fscore
|
|
value: [0.87610619 0.94117647 0.89285714 0.85585586 0.87892377 0.97222222
|
|
0.89686099 0.88888889 0.87892377 0.87387387]
|
|
|
|
mean value: 0.8955689169155857
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.88888889 0.70588235 0.75 0.70588235 0.8
|
|
0.75 0.76923077 0.78571429 0.83333333]
|
|
|
|
mean value: 0.7835085829203476
|
|
|
|
key: train_precision
|
|
value: [0.83898305 0.92035398 0.86206897 0.83333333 0.85217391 0.97222222
|
|
0.86956522 0.85470085 0.85217391 0.85087719]
|
|
|
|
mean value: 0.8706452645382711
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.66666667 1. 1. 1. 0.66666667
|
|
0.75 0.83333333 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.96296296 0.92592593 0.87962963 0.90740741 0.97222222
|
|
0.92592593 0.92592593 0.90740741 0.89814815]
|
|
|
|
mean value: 0.9222222222222223
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.79166667 0.79166667 0.83333333 0.79166667 0.75
|
|
0.75 0.79166667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8041666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.87037037 0.93981481 0.88888889 0.85185185 0.875 0.97222222
|
|
0.89351852 0.88425926 0.875 0.87037037]
|
|
|
|
mean value: 0.8921296296296296
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.61538462 0.70588235 0.75 0.70588235 0.57142857
|
|
0.6 0.66666667 0.73333333 0.71428571]
|
|
|
|
mean value: 0.684857789269554
|
|
|
|
key: train_jcc
|
|
value: [0.77952756 0.88888889 0.80645161 0.7480315 0.784 0.94594595
|
|
0.81300813 0.8 0.784 0.776 ]
|
|
|
|
mean value: 0.8125853632937472
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01518965 0.01046348 0.01070547 0.01042414 0.0102427 0.01024985
|
|
0.00982404 0.01023245 0.01031613 0.01034975]
|
|
|
|
mean value: 0.010799765586853027
|
|
|
|
key: score_time
|
|
value: [0.01212573 0.01018858 0.01014185 0.00970149 0.00969028 0.00975275
|
|
0.00983071 0.00985217 0.0098865 0.00986671]
|
|
|
|
mean value: 0.010103678703308106
|
|
|
|
key: test_mcc
|
|
value: [ 0.38490018 0.43033148 -0.2236068 0.35355339 0.64168895 0.50709255
|
|
0.27500955 0.70710678 0.60246408 0.60246408]
|
|
|
|
mean value: 0.42810042384202684
|
|
|
|
key: train_mcc
|
|
value: [0.49840764 0.50566876 0.54289671 0.51282259 0.48653363 0.52261966
|
|
0.50639215 0.47847133 0.49041703 0.4990914 ]
|
|
|
|
mean value: 0.5043320918458405
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.70833333 0.41666667 0.66666667 0.79166667 0.75
|
|
0.625 0.83333333 0.79166667 0.79166667]
|
|
|
|
mean value: 0.7041666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.73148148 0.71759259 0.75925926 0.75 0.72685185 0.74074074
|
|
0.73611111 0.72222222 0.72685185 0.72685185]
|
|
|
|
mean value: 0.7337962962962963
|
|
|
|
key: test_fscore
|
|
value: [0.73333333 0.74074074 0.5625 0.71428571 0.82758621 0.76923077
|
|
0.68965517 0.85714286 0.81481481 0.81481481]
|
|
|
|
mean value: 0.7524104423673389
|
|
|
|
key: train_fscore
|
|
value: [0.7734375 0.77490775 0.79032258 0.775 0.76862745 0.78294574
|
|
0.77647059 0.765625 0.77042802 0.77394636]
|
|
|
|
mean value: 0.7751710981089905
|
|
|
|
key: test_precision
|
|
value: [0.61111111 0.66666667 0.45 0.625 0.70588235 0.71428571
|
|
0.58823529 0.75 0.73333333 0.73333333]
|
|
|
|
mean value: 0.6577847805788982
|
|
|
|
key: train_precision
|
|
value: [0.66891892 0.64417178 0.7 0.70454545 0.66666667 0.67333333
|
|
0.67346939 0.66216216 0.66442953 0.66013072]
|
|
|
|
mean value: 0.6717827951678332
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.75 0.83333333 1. 0.83333333
|
|
0.83333333 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.97222222 0.90740741 0.86111111 0.90740741 0.93518519
|
|
0.91666667 0.90740741 0.91666667 0.93518519]
|
|
|
|
mean value: 0.9175925925925926
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.70833333 0.41666667 0.66666667 0.79166667 0.75
|
|
0.625 0.83333333 0.79166667 0.79166667]
|
|
|
|
mean value: 0.7041666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.73148148 0.71759259 0.75925926 0.75 0.72685185 0.74074074
|
|
0.73611111 0.72222222 0.72685185 0.72685185]
|
|
|
|
mean value: 0.7337962962962963
|
|
|
|
key: test_jcc
|
|
value: [0.57894737 0.58823529 0.39130435 0.55555556 0.70588235 0.625
|
|
0.52631579 0.75 0.6875 0.6875 ]
|
|
|
|
mean value: 0.6096240708335203
|
|
|
|
key: train_jcc
|
|
value: [0.63057325 0.63253012 0.65333333 0.63265306 0.62420382 0.6433121
|
|
0.63461538 0.62025316 0.62658228 0.63125 ]
|
|
|
|
mean value: 0.6329306514667632
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0104332 0.01062703 0.00978208 0.01074839 0.01041389 0.01024389
|
|
0.01062751 0.01055861 0.01067042 0.01015282]
|
|
|
|
mean value: 0.010425782203674317
|
|
|
|
key: score_time
|
|
value: [0.00950599 0.0098362 0.01021171 0.00980735 0.00950599 0.00997663
|
|
0.00994396 0.00983953 0.01008701 0.00977087]
|
|
|
|
mean value: 0.009848523139953613
|
|
|
|
key: test_mcc
|
|
value: [0.50709255 0.35355339 0.0860663 0.60246408 0.2508726 0.58536941
|
|
0.25819889 0.58536941 0.84515425 0.83333333]
|
|
|
|
mean value: 0.49074742109105457
|
|
|
|
key: train_mcc
|
|
value: [0.63060354 0.62103628 0.65743559 0.64993368 0.66222239 0.62060985
|
|
0.63006192 0.63856099 0.62253572 0.60625994]
|
|
|
|
mean value: 0.6339259890467467
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.66666667 0.54166667 0.79166667 0.625 0.79166667
|
|
0.625 0.79166667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.81481481 0.81018519 0.8287037 0.82407407 0.8287037 0.81018519
|
|
0.81481481 0.81481481 0.81018519 0.80092593]
|
|
|
|
mean value: 0.8157407407407408
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.6 0.59259259 0.81481481 0.60869565 0.8
|
|
0.66666667 0.7826087 0.92307692 0.91666667]
|
|
|
|
mean value: 0.7432394738916478
|
|
|
|
key: train_fscore
|
|
value: [0.81981982 0.81447964 0.82949309 0.83035714 0.83842795 0.81278539
|
|
0.81818182 0.82905983 0.81777778 0.81222707]
|
|
|
|
mean value: 0.8222609523224956
|
|
|
|
key: test_precision
|
|
value: [0.8 0.75 0.53333333 0.73333333 0.63636364 0.76923077
|
|
0.6 0.81818182 0.85714286 0.91666667]
|
|
|
|
mean value: 0.7414252414252415
|
|
|
|
key: train_precision
|
|
value: [0.79824561 0.79646018 0.82568807 0.80172414 0.79338843 0.8018018
|
|
0.80357143 0.76984127 0.78632479 0.76859504]
|
|
|
|
mean value: 0.7945640759965436
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.5 0.66666667 0.91666667 0.58333333 0.83333333
|
|
0.75 0.75 1. 0.91666667]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.84259259 0.83333333 0.83333333 0.86111111 0.88888889 0.82407407
|
|
0.83333333 0.89814815 0.85185185 0.86111111]
|
|
|
|
mean value: 0.8527777777777777
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.66666667 0.54166667 0.79166667 0.625 0.79166667
|
|
0.625 0.79166667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.81481481 0.81018519 0.8287037 0.82407407 0.8287037 0.81018519
|
|
0.81481481 0.81481481 0.81018519 0.80092593]
|
|
|
|
mean value: 0.8157407407407408
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.42857143 0.42105263 0.6875 0.4375 0.66666667
|
|
0.5 0.64285714 0.85714286 0.84615385]
|
|
|
|
mean value: 0.605887314439946
|
|
|
|
key: train_jcc
|
|
value: [0.69465649 0.6870229 0.70866142 0.70992366 0.72180451 0.68461538
|
|
0.69230769 0.7080292 0.69172932 0.68382353]
|
|
|
|
mean value: 0.6982574108759549
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00942445 0.01014233 0.00951648 0.01010513 0.00946379 0.01008248
|
|
0.01012278 0.01024103 0.0107336 0.01002169]
|
|
|
|
mean value: 0.00998537540435791
|
|
|
|
key: score_time
|
|
value: [0.01131845 0.011024 0.01125288 0.01072717 0.01059318 0.01117182
|
|
0.01103806 0.01099634 0.01130033 0.011235 ]
|
|
|
|
mean value: 0.01106572151184082
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.58536941 0. 0.33333333 0.43033148 0.83333333
|
|
0.0860663 0.3380617 0.6761234 0.50709255]
|
|
|
|
mean value: 0.4375080918682246
|
|
|
|
key: train_mcc
|
|
value: [0.62361342 0.60774211 0.64356824 0.64993368 0.58760578 0.58760578
|
|
0.64514162 0.59763515 0.63355259 0.61491869]
|
|
|
|
mean value: 0.6191317076001605
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.79166667 0.5 0.66666667 0.70833333 0.91666667
|
|
0.54166667 0.66666667 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.81018519 0.80092593 0.81944444 0.82407407 0.79166667 0.79166667
|
|
0.81944444 0.7962963 0.81481481 0.80555556]
|
|
|
|
mean value: 0.8074074074074074
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.8 0.57142857 0.66666667 0.74074074 0.91666667
|
|
0.59259259 0.69230769 0.84615385 0.72727273]
|
|
|
|
mean value: 0.7336438199481677
|
|
|
|
key: train_fscore
|
|
value: [0.81938326 0.81385281 0.82969432 0.83035714 0.80349345 0.80349345
|
|
0.83116883 0.80869565 0.8245614 0.81578947]
|
|
|
|
mean value: 0.8180489799865002
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.76923077 0.5 0.66666667 0.66666667 0.91666667
|
|
0.53333333 0.64285714 0.78571429 0.8 ]
|
|
|
|
mean value: 0.7099317349317349
|
|
|
|
key: train_precision
|
|
value: [0.78151261 0.76422764 0.78512397 0.80172414 0.76033058 0.76033058
|
|
0.7804878 0.76229508 0.78333333 0.775 ]
|
|
|
|
mean value: 0.7754365729395012
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 0.66666667 0.66666667 0.83333333 0.91666667
|
|
0.66666667 0.75 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_recall
|
|
value: [0.86111111 0.87037037 0.87962963 0.86111111 0.85185185 0.85185185
|
|
0.88888889 0.86111111 0.87037037 0.86111111]
|
|
|
|
mean value: 0.8657407407407407
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.79166667 0.5 0.66666667 0.70833333 0.91666667
|
|
0.54166667 0.66666667 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.81018519 0.80092593 0.81944444 0.82407407 0.79166667 0.79166667
|
|
0.81944444 0.7962963 0.81481481 0.80555556]
|
|
|
|
mean value: 0.8074074074074074
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.66666667 0.4 0.5 0.58823529 0.84615385
|
|
0.42105263 0.52941176 0.73333333 0.57142857]
|
|
|
|
mean value: 0.5899139250842037
|
|
|
|
key: train_jcc
|
|
value: [0.69402985 0.68613139 0.70895522 0.70992366 0.67153285 0.67153285
|
|
0.71111111 0.67883212 0.70149254 0.68888889]
|
|
|
|
mean value: 0.6922430473142728
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01471233 0.01398039 0.0138483 0.01294971 0.01366282 0.01385617
|
|
0.01302624 0.01307869 0.01325321 0.01405549]
|
|
|
|
mean value: 0.013642334938049316
|
|
|
|
key: score_time
|
|
value: [0.01138425 0.01074481 0.01006532 0.01083374 0.01002431 0.01079631
|
|
0.01036167 0.01062918 0.01029849 0.01043081]
|
|
|
|
mean value: 0.010556888580322266
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.66666667 0.35355339 0.50709255 0.43033148 0.58536941
|
|
0.50709255 0.6761234 0.58536941 0.75261781]
|
|
|
|
mean value: 0.5816834481651599
|
|
|
|
key: train_mcc
|
|
value: [0.77603992 0.77822 0.7741473 0.77120096 0.80976668 0.78439613
|
|
0.79280145 0.75549315 0.78262379 0.76572003]
|
|
|
|
mean value: 0.7790409438143523
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.83333333 0.66666667 0.75 0.70833333 0.79166667
|
|
0.75 0.83333333 0.79166667 0.875 ]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_accuracy
|
|
value: [0.88425926 0.88425926 0.88425926 0.88425926 0.90277778 0.88888889
|
|
0.89351852 0.875 0.88888889 0.87962963]
|
|
|
|
mean value: 0.8865740740740741
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.83333333 0.71428571 0.76923077 0.74074074 0.7826087
|
|
0.76923077 0.84615385 0.8 0.88 ]
|
|
|
|
mean value: 0.8015583868627346
|
|
|
|
key: train_fscore
|
|
value: [0.89177489 0.89270386 0.89082969 0.88888889 0.90748899 0.89565217
|
|
0.89956332 0.88209607 0.89473684 0.88695652]
|
|
|
|
mean value: 0.8930691250835735
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.83333333 0.625 0.71428571 0.66666667 0.81818182
|
|
0.71428571 0.78571429 0.76923077 0.84615385]
|
|
|
|
mean value: 0.7619005994005994
|
|
|
|
key: train_precision
|
|
value: [0.83739837 0.832 0.84297521 0.85470085 0.86554622 0.8442623
|
|
0.85123967 0.83471074 0.85 0.83606557]
|
|
|
|
mean value: 0.8448898935859159
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.83333333 0.83333333 0.83333333 0.75
|
|
0.83333333 0.91666667 0.83333333 0.91666667]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [0.9537037 0.96296296 0.94444444 0.92592593 0.9537037 0.9537037
|
|
0.9537037 0.93518519 0.94444444 0.94444444]
|
|
|
|
mean value: 0.9472222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.83333333 0.66666667 0.75 0.70833333 0.79166667
|
|
0.75 0.83333333 0.79166667 0.875 ]
|
|
|
|
mean value: 0.7875000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.88425926 0.88425926 0.88425926 0.88425926 0.90277778 0.88888889
|
|
0.89351852 0.875 0.88888889 0.87962963]
|
|
|
|
mean value: 0.8865740740740741
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.71428571 0.55555556 0.625 0.58823529 0.64285714
|
|
0.625 0.73333333 0.66666667 0.78571429]
|
|
|
|
mean value: 0.6722362278244631
|
|
|
|
key: train_jcc
|
|
value: [0.8046875 0.80620155 0.80314961 0.8 0.83064516 0.81102362
|
|
0.81746032 0.7890625 0.80952381 0.796875 ]
|
|
|
|
mean value: 0.8068629067008504
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.62048364 0.84822226 0.81147861 1.21917677 1.00702286 0.98516321
|
|
1.01889133 1.1750536 1.02101469 0.70334888]
|
|
|
|
mean value: 0.9409855842590332
|
|
|
|
key: score_time
|
|
value: [0.0172441 0.01233554 0.01252532 0.0141468 0.01485038 0.01244426
|
|
0.0170753 0.01252484 0.0150919 0.0125711 ]
|
|
|
|
mean value: 0.014080953598022462
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.45834925 0.45834925 0.75261781 0.58536941 0.60246408
|
|
0.5 0.58536941 0.91986621 0.50709255]
|
|
|
|
mean value: 0.5954847366971103
|
|
|
|
key: train_mcc
|
|
value: [0.87051965 0.95374459 0.92608473 0.9459053 0.98164982 0.92592593
|
|
0.94460643 0.9722639 0.96296296 0.88888889]
|
|
|
|
mean value: 0.9372552198040212
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.70833333 0.875 0.79166667 0.79166667
|
|
0.75 0.79166667 0.95833333 0.75 ]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.93518519 0.97685185 0.96296296 0.97222222 0.99074074 0.96296296
|
|
0.97222222 0.98611111 0.98148148 0.94444444]
|
|
|
|
mean value: 0.9685185185185186
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.63157895 0.75862069 0.86956522 0.7826087 0.76190476
|
|
0.75 0.8 0.95652174 0.72727273]
|
|
|
|
mean value: 0.7820681474027169
|
|
|
|
key: train_fscore
|
|
value: [0.93457944 0.97695853 0.96330275 0.97142857 0.99065421 0.96296296
|
|
0.97196262 0.98604651 0.98148148 0.94444444]
|
|
|
|
mean value: 0.968382151126681
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.85714286 0.64705882 0.90909091 0.81818182 0.88888889
|
|
0.75 0.76923077 1. 0.8 ]
|
|
|
|
mean value: 0.8257775884246472
|
|
|
|
key: train_precision
|
|
value: [0.94339623 0.97247706 0.95454545 1. 1. 0.96296296
|
|
0.98113208 0.99065421 0.98148148 0.94444444]
|
|
|
|
mean value: 0.9731093915148796
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.91666667 0.83333333 0.75 0.66666667
|
|
0.75 0.83333333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.92592593 0.98148148 0.97222222 0.94444444 0.98148148 0.96296296
|
|
0.96296296 0.98148148 0.98148148 0.94444444]
|
|
|
|
mean value: 0.9638888888888889
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.70833333 0.875 0.79166667 0.79166667
|
|
0.75 0.79166667 0.95833333 0.75 ]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.93518519 0.97685185 0.96296296 0.97222222 0.99074074 0.96296296
|
|
0.97222222 0.98611111 0.98148148 0.94444444]
|
|
|
|
mean value: 0.9685185185185186
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.46153846 0.61111111 0.76923077 0.64285714 0.61538462
|
|
0.6 0.66666667 0.91666667 0.57142857]
|
|
|
|
mean value: 0.6497741147741147
|
|
|
|
key: train_jcc
|
|
value: [0.87719298 0.95495495 0.92920354 0.94444444 0.98148148 0.92857143
|
|
0.94545455 0.97247706 0.96363636 0.89473684]
|
|
|
|
mean value: 0.9392153647147814
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02163959 0.01607132 0.0166142 0.01710081 0.01704025 0.01535535
|
|
0.01831055 0.01811218 0.01655841 0.01498246]
|
|
|
|
mean value: 0.01717851161956787
|
|
|
|
key: score_time
|
|
value: [0.01209736 0.0092988 0.00942469 0.01364398 0.00921535 0.00895262
|
|
0.00913954 0.00918198 0.00914049 0.00899124]
|
|
|
|
mean value: 0.009908604621887206
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.6761234 0.1767767 0.16903085 0.1767767 0.41812101
|
|
0.41812101 0.43033148 0.50709255 0.6761234 ]
|
|
|
|
mean value: 0.43151637615274063
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.83333333 0.58333333 0.58333333 0.58333333 0.70833333
|
|
0.70833333 0.70833333 0.75 0.83333333]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.81818182 0.64285714 0.54545455 0.64285714 0.69565217
|
|
0.72 0.74074074 0.76923077 0.81818182]
|
|
|
|
mean value: 0.7226489484750355
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.9 0.5625 0.6 0.5625 0.72727273
|
|
0.69230769 0.66666667 0.71428571 0.9 ]
|
|
|
|
mean value: 0.7158866133866134
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.75 0.5 0.75 0.66666667
|
|
0.75 0.83333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.83333333 0.58333333 0.58333333 0.58333333 0.70833333
|
|
0.70833333 0.70833333 0.75 0.83333333]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.69230769 0.47368421 0.375 0.47368421 0.53333333
|
|
0.5625 0.58823529 0.625 0.69230769]
|
|
|
|
mean value: 0.5730338147404711
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09890485 0.10148096 0.10069036 0.10080361 0.1014545 0.10283852
|
|
0.10187411 0.10599065 0.10443878 0.1030066 ]
|
|
|
|
mean value: 0.10214829444885254
|
|
|
|
key: score_time
|
|
value: [0.01787829 0.01847506 0.01830387 0.01904178 0.01925802 0.01807094
|
|
0.01921487 0.01858139 0.01915097 0.01860642]
|
|
|
|
mean value: 0.018658161163330078
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.58536941 0.43033148 0.58536941 0.41812101 0.6761234
|
|
0.41812101 0.50709255 0.84515425 0.41812101]
|
|
|
|
mean value: 0.5383803523280938
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.79166667 0.70833333 0.79166667 0.70833333 0.83333333
|
|
0.70833333 0.75 0.91666667 0.70833333]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.7826087 0.74074074 0.8 0.72 0.81818182
|
|
0.72 0.76923077 0.92307692 0.72 ]
|
|
|
|
mean value: 0.7743838946882424
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.81818182 0.66666667 0.76923077 0.69230769 0.9
|
|
0.69230769 0.71428571 0.85714286 0.69230769]
|
|
|
|
mean value: 0.7552430902430902
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.83333333 0.83333333 0.75 0.75
|
|
0.75 0.83333333 1. 0.75 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.79166667 0.70833333 0.79166667 0.70833333 0.83333333
|
|
0.70833333 0.75 0.91666667 0.70833333]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.64285714 0.58823529 0.66666667 0.5625 0.69230769
|
|
0.5625 0.625 0.85714286 0.5625 ]
|
|
|
|
mean value: 0.6359709653092006
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01117754 0.00970054 0.00896549 0.00912547 0.00895023 0.00913262
|
|
0.00898814 0.00906181 0.0092175 0.00906587]
|
|
|
|
mean value: 0.00933852195739746
|
|
|
|
key: score_time
|
|
value: [0.00911736 0.00853801 0.0085423 0.00858045 0.00854707 0.00860143
|
|
0.00853682 0.00857329 0.00868487 0.00865126]
|
|
|
|
mean value: 0.008637285232543946
|
|
|
|
key: test_mcc
|
|
value: [ 0.58536941 -0.0836242 0.3380617 0.43033148 0.33333333 0.16666667
|
|
-0.0836242 0.2508726 0.70710678 0.25819889]
|
|
|
|
mean value: 0.2902692463742723
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.45833333 0.66666667 0.70833333 0.66666667 0.58333333
|
|
0.45833333 0.625 0.83333333 0.625 ]
|
|
|
|
mean value: 0.6416666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.43478261 0.63636364 0.66666667 0.66666667 0.58333333
|
|
0.43478261 0.60869565 0.8 0.57142857]
|
|
|
|
mean value: 0.6185328439676265
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.45454545 0.7 0.77777778 0.66666667 0.58333333
|
|
0.45454545 0.63636364 1. 0.66666667]
|
|
|
|
mean value: 0.6758080808080809
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.41666667 0.58333333 0.58333333 0.66666667 0.58333333
|
|
0.41666667 0.58333333 0.66666667 0.5 ]
|
|
|
|
mean value: 0.575
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.45833333 0.66666667 0.70833333 0.66666667 0.58333333
|
|
0.45833333 0.625 0.83333333 0.625 ]
|
|
|
|
mean value: 0.6416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.27777778 0.46666667 0.5 0.5 0.41176471
|
|
0.27777778 0.4375 0.66666667 0.4 ]
|
|
|
|
mean value: 0.4581010737628385
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.30116844 1.30090857 1.57670236 1.31556249 1.32972836 1.33339977
|
|
1.34411907 1.32579279 1.33161736 1.35981679]
|
|
|
|
mean value: 1.3518815994262696
|
|
|
|
key: score_time
|
|
value: [0.08967829 0.08942246 0.09675574 0.09040046 0.09746838 0.09508467
|
|
0.10231495 0.09039664 0.09650874 0.09847116]
|
|
|
|
mean value: 0.09465014934539795
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.57735027 0.60246408 0.66666667 0.6761234 0.64168895
|
|
0.33333333 0.43033148 0.75261781 0.41812101]
|
|
|
|
mean value: 0.5851314802897141
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.79166667 0.83333333 0.83333333 0.79166667
|
|
0.66666667 0.70833333 0.875 0.70833333]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.66666667 0.81481481 0.83333333 0.84615385 0.73684211
|
|
0.66666667 0.74074074 0.88 0.69565217]
|
|
|
|
mean value: 0.7750435564943574
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 1. 0.73333333 0.83333333 0.78571429 1.
|
|
0.66666667 0.66666667 0.84615385 0.72727273]
|
|
|
|
mean value: 0.8168231768231768
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.5 0.91666667 0.83333333 0.91666667 0.58333333
|
|
0.66666667 0.83333333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.79166667 0.83333333 0.83333333 0.79166667
|
|
0.66666667 0.70833333 0.875 0.70833333]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.5 0.6875 0.71428571 0.73333333 0.58333333
|
|
0.5 0.58823529 0.78571429 0.53333333]
|
|
|
|
mean value: 0.6394966063348416
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.8898294 0.87875557 0.97417212 0.86813331 0.89023066 0.9033277
|
|
0.90997934 0.94354987 0.91526437 0.98898816]
|
|
|
|
mean value: 0.9162230491638184
|
|
|
|
key: score_time
|
|
value: [0.19945073 0.18877649 0.24253082 0.20921898 0.23907328 0.16415858
|
|
0.2325561 0.2260251 0.23510647 0.21560669]
|
|
|
|
mean value: 0.2152503252029419
|
|
|
|
key: test_mcc
|
|
value: [0.6761234 0.53033009 0.70710678 0.66666667 0.6761234 0.64168895
|
|
0.50709255 0.50709255 0.75261781 0.5 ]
|
|
|
|
mean value: 0.6164842203909101
|
|
|
|
key: train_mcc
|
|
value: [0.90756304 0.88949918 0.88904134 0.90756304 0.93554619 0.88904134
|
|
0.90756304 0.92608473 0.90803041 0.92656165]
|
|
|
|
mean value: 0.9086493966270899
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.83333333 0.83333333 0.83333333 0.79166667
|
|
0.75 0.75 0.875 0.75 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_accuracy
|
|
value: [0.9537037 0.94444444 0.94444444 0.9537037 0.96759259 0.94444444
|
|
0.9537037 0.96296296 0.9537037 0.96296296]
|
|
|
|
mean value: 0.9541666666666666
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.7 0.85714286 0.83333333 0.84615385 0.73684211
|
|
0.72727273 0.76923077 0.86956522 0.75 ]
|
|
|
|
mean value: 0.7907722673969814
|
|
|
|
key: train_fscore
|
|
value: [0.95412844 0.94545455 0.94495413 0.95412844 0.96803653 0.94495413
|
|
0.95412844 0.96330275 0.95454545 0.96363636]
|
|
|
|
mean value: 0.9547269223591959
|
|
|
|
key: test_precision
|
|
value: [0.9 0.875 0.75 0.83333333 0.78571429 1.
|
|
0.8 0.71428571 0.90909091 0.75 ]
|
|
|
|
mean value: 0.8317424242424243
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.94545455 0.92857143 0.93636364 0.94545455 0.95495495 0.93636364
|
|
0.94545455 0.95454545 0.9375 0.94642857]
|
|
|
|
mean value: 0.9431091318591318
|
|
|
|
key: test_recall
|
|
value: [0.75 0.58333333 1. 0.83333333 0.91666667 0.58333333
|
|
0.66666667 0.83333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [0.96296296 0.96296296 0.9537037 0.96296296 0.98148148 0.9537037
|
|
0.96296296 0.97222222 0.97222222 0.98148148]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.75 0.83333333 0.83333333 0.83333333 0.79166667
|
|
0.75 0.75 0.875 0.75 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_roc_auc
|
|
value: [0.9537037 0.94444444 0.94444444 0.9537037 0.96759259 0.94444444
|
|
0.9537037 0.96296296 0.9537037 0.96296296]
|
|
|
|
mean value: 0.9541666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.53846154 0.75 0.71428571 0.73333333 0.58333333
|
|
0.57142857 0.625 0.76923077 0.6 ]
|
|
|
|
mean value: 0.6577380952380952
|
|
|
|
key: train_jcc
|
|
value: [0.9122807 0.89655172 0.89565217 0.9122807 0.9380531 0.89565217
|
|
0.9122807 0.92920354 0.91304348 0.92982456]
|
|
|
|
mean value: 0.9134822854059695
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02481365 0.00924039 0.00934982 0.009166 0.010216 0.01012945
|
|
0.01021767 0.01023412 0.00972962 0.00938153]
|
|
|
|
mean value: 0.011247825622558594
|
|
|
|
key: score_time
|
|
value: [0.01276445 0.0087719 0.00875235 0.00882077 0.00955319 0.00952816
|
|
0.00946164 0.00956464 0.00878978 0.00889206]
|
|
|
|
mean value: 0.009489893913269043
|
|
|
|
key: test_mcc
|
|
value: [0.50709255 0.35355339 0.0860663 0.60246408 0.2508726 0.58536941
|
|
0.25819889 0.58536941 0.84515425 0.83333333]
|
|
|
|
mean value: 0.49074742109105457
|
|
|
|
key: train_mcc
|
|
value: [0.63060354 0.62103628 0.65743559 0.64993368 0.66222239 0.62060985
|
|
0.63006192 0.63856099 0.62253572 0.60625994]
|
|
|
|
mean value: 0.6339259890467467
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.66666667 0.54166667 0.79166667 0.625 0.79166667
|
|
0.625 0.79166667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.81481481 0.81018519 0.8287037 0.82407407 0.8287037 0.81018519
|
|
0.81481481 0.81481481 0.81018519 0.80092593]
|
|
|
|
mean value: 0.8157407407407408
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.6 0.59259259 0.81481481 0.60869565 0.8
|
|
0.66666667 0.7826087 0.92307692 0.91666667]
|
|
|
|
mean value: 0.7432394738916478
|
|
|
|
key: train_fscore
|
|
value: [0.81981982 0.81447964 0.82949309 0.83035714 0.83842795 0.81278539
|
|
0.81818182 0.82905983 0.81777778 0.81222707]
|
|
|
|
mean value: 0.8222609523224956
|
|
|
|
key: test_precision
|
|
value: [0.8 0.75 0.53333333 0.73333333 0.63636364 0.76923077
|
|
0.6 0.81818182 0.85714286 0.91666667]
|
|
|
|
mean value: 0.7414252414252415
|
|
|
|
key: train_precision
|
|
value: [0.79824561 0.79646018 0.82568807 0.80172414 0.79338843 0.8018018
|
|
0.80357143 0.76984127 0.78632479 0.76859504]
|
|
|
|
mean value: 0.7945640759965436
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.5 0.66666667 0.91666667 0.58333333 0.83333333
|
|
0.75 0.75 1. 0.91666667]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.84259259 0.83333333 0.83333333 0.86111111 0.88888889 0.82407407
|
|
0.83333333 0.89814815 0.85185185 0.86111111]
|
|
|
|
mean value: 0.8527777777777777
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.66666667 0.54166667 0.79166667 0.625 0.79166667
|
|
0.625 0.79166667 0.91666667 0.91666667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.81481481 0.81018519 0.8287037 0.82407407 0.8287037 0.81018519
|
|
0.81481481 0.81481481 0.81018519 0.80092593]
|
|
|
|
mean value: 0.8157407407407408
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.42857143 0.42105263 0.6875 0.4375 0.66666667
|
|
0.5 0.64285714 0.85714286 0.84615385]
|
|
|
|
mean value: 0.605887314439946
|
|
|
|
key: train_jcc
|
|
value: [0.69465649 0.6870229 0.70866142 0.70992366 0.72180451 0.68461538
|
|
0.69230769 0.7080292 0.69172932 0.68382353]
|
|
|
|
mean value: 0.6982574108759549
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.13619304 0.05510259 0.05758572 0.06099463 0.0575664 0.0591476
|
|
0.06102586 0.07658863 0.06151056 0.06288791]
|
|
|
|
mean value: 0.06886029243469238
|
|
|
|
key: score_time
|
|
value: [0.01118422 0.01046395 0.01036596 0.0104425 0.01049066 0.01043773
|
|
0.01057792 0.01159239 0.01076841 0.01087976]
|
|
|
|
mean value: 0.010720348358154297
|
|
|
|
key: test_mcc
|
|
value: [0.83333333 0.6761234 0.70710678 0.75261781 0.75261781 0.53033009
|
|
0.66666667 0.5 0.75261781 0.50709255]
|
|
|
|
mean value: 0.6678506250715527
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.83333333 0.83333333 0.875 0.875 0.75
|
|
0.83333333 0.75 0.875 0.75 ]
|
|
|
|
mean value: 0.8291666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.81818182 0.85714286 0.88 0.86956522 0.7
|
|
0.83333333 0.75 0.88 0.72727273]
|
|
|
|
mean value: 0.8232162619988707
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.9 0.75 0.84615385 0.90909091 0.875
|
|
0.83333333 0.75 0.84615385 0.8 ]
|
|
|
|
mean value: 0.8426398601398601
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.75 1. 0.91666667 0.83333333 0.58333333
|
|
0.83333333 0.75 0.91666667 0.66666667]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.83333333 0.83333333 0.875 0.875 0.75
|
|
0.83333333 0.75 0.875 0.75 ]
|
|
|
|
mean value: 0.8291666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.69230769 0.75 0.78571429 0.76923077 0.53846154
|
|
0.71428571 0.6 0.78571429 0.57142857]
|
|
|
|
mean value: 0.7053296703296703
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03201962 0.06125259 0.06060815 0.06064439 0.05987048 0.06112409
|
|
0.06051135 0.07771921 0.05222392 0.07174587]
|
|
|
|
mean value: 0.0597719669342041
|
|
|
|
key: score_time
|
|
value: [0.02101016 0.0216496 0.02375889 0.02242851 0.02114177 0.02297425
|
|
0.02142954 0.02500796 0.02404189 0.02397108]
|
|
|
|
mean value: 0.022741365432739257
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.38490018 0.16903085 0.5 0.16666667 0.58536941
|
|
0.60246408 0.50709255 0.75261781 0.2508726 ]
|
|
|
|
mean value: 0.4585680811666079
|
|
|
|
key: train_mcc
|
|
value: [0.92608473 0.96312812 0.94444444 0.95374459 0.94460643 0.94444444
|
|
0.95374459 0.96312812 0.95374459 0.97259753]
|
|
|
|
mean value: 0.9519667586332593
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.66666667 0.58333333 0.75 0.58333333 0.79166667
|
|
0.79166667 0.75 0.875 0.625 ]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_accuracy
|
|
value: [0.96296296 0.98148148 0.97222222 0.97685185 0.97222222 0.97222222
|
|
0.97685185 0.98148148 0.97685185 0.98611111]
|
|
|
|
mean value: 0.975925925925926
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.73333333 0.61538462 0.75 0.58333333 0.8
|
|
0.81481481 0.72727273 0.86956522 0.64 ]
|
|
|
|
mean value: 0.7367037374863462
|
|
|
|
key: train_fscore
|
|
value: [0.96330275 0.98165138 0.97222222 0.97695853 0.97247706 0.97222222
|
|
0.97674419 0.98165138 0.97695853 0.98630137]
|
|
|
|
mean value: 0.9760489619852554
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.61111111 0.57142857 0.75 0.58333333 0.76923077
|
|
0.73333333 0.8 0.90909091 0.61538462]
|
|
|
|
mean value: 0.7176245976245976
|
|
|
|
key: train_precision
|
|
value: [0.95454545 0.97272727 0.97222222 0.97247706 0.96363636 0.97222222
|
|
0.98130841 0.97272727 0.97247706 0.97297297]
|
|
|
|
mean value: 0.9707316320709102
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.91666667 0.66666667 0.75 0.58333333 0.83333333
|
|
0.91666667 0.66666667 0.83333333 0.66666667]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_recall
|
|
value: [0.97222222 0.99074074 0.97222222 0.98148148 0.98148148 0.97222222
|
|
0.97222222 0.99074074 0.98148148 1. ]
|
|
|
|
mean value: 0.9814814814814815
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.66666667 0.58333333 0.75 0.58333333 0.79166667
|
|
0.79166667 0.75 0.875 0.625 ]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_roc_auc
|
|
value: [0.96296296 0.98148148 0.97222222 0.97685185 0.97222222 0.97222222
|
|
0.97685185 0.98148148 0.97685185 0.98611111]
|
|
|
|
mean value: 0.9759259259259259
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.57894737 0.44444444 0.6 0.41176471 0.66666667
|
|
0.6875 0.57142857 0.76923077 0.47058824]
|
|
|
|
mean value: 0.5914856475653689
|
|
|
|
key: train_jcc
|
|
value: [0.92920354 0.96396396 0.94594595 0.95495495 0.94642857 0.94594595
|
|
0.95454545 0.96396396 0.95495495 0.97297297]
|
|
|
|
mean value: 0.9532880268499737
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02389264 0.00940037 0.00887752 0.0089426 0.00882745 0.00888252
|
|
0.00894308 0.00896382 0.00901222 0.00887132]
|
|
|
|
mean value: 0.01046135425567627
|
|
|
|
key: score_time
|
|
value: [0.01013374 0.00891423 0.00849485 0.0085175 0.00852847 0.00855064
|
|
0.00857592 0.00854993 0.00858283 0.00857258]
|
|
|
|
mean value: 0.008742070198059082
|
|
|
|
key: test_mcc
|
|
value: [0.43033148 0.41812101 0. 0.45834925 0.60246408 0.41812101
|
|
0.50709255 0.58536941 0.3380617 0.66666667]
|
|
|
|
mean value: 0.44245771459099875
|
|
|
|
key: train_mcc
|
|
value: [0.48685383 0.48557856 0.50557897 0.50425466 0.48685383 0.47568087
|
|
0.48557856 0.46812868 0.49554356 0.49693566]
|
|
|
|
mean value: 0.489098717880917
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.70833333 0.5 0.70833333 0.79166667 0.70833333
|
|
0.75 0.79166667 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.74074074 0.74074074 0.75 0.75 0.74074074 0.73611111
|
|
0.74074074 0.73148148 0.74537037 0.74537037]
|
|
|
|
mean value: 0.7421296296296296
|
|
|
|
key: test_fscore
|
|
value: [0.74074074 0.72 0.57142857 0.75862069 0.81481481 0.72
|
|
0.76923077 0.8 0.63636364 0.83333333]
|
|
|
|
mean value: 0.7364532555567038
|
|
|
|
key: train_fscore
|
|
value: [0.75862069 0.75652174 0.76724138 0.76521739 0.75862069 0.7510917
|
|
0.75652174 0.75 0.76190476 0.7639485 ]
|
|
|
|
mean value: 0.7589688591001514
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.69230769 0.5 0.64705882 0.73333333 0.69230769
|
|
0.71428571 0.76923077 0.7 0.83333333]
|
|
|
|
mean value: 0.6948524024994613
|
|
|
|
key: train_precision
|
|
value: [0.70967742 0.71311475 0.71774194 0.72131148 0.70967742 0.7107438
|
|
0.71311475 0.7016129 0.71544715 0.712 ]
|
|
|
|
mean value: 0.712444161715035
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.66666667 0.91666667 0.91666667 0.75
|
|
0.83333333 0.83333333 0.58333333 0.83333333]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_recall
|
|
value: [0.81481481 0.80555556 0.82407407 0.81481481 0.81481481 0.7962963
|
|
0.80555556 0.80555556 0.81481481 0.82407407]
|
|
|
|
mean value: 0.812037037037037
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.5 0.70833333 0.79166667 0.70833333
|
|
0.75 0.79166667 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.74074074 0.74074074 0.75 0.75 0.74074074 0.73611111
|
|
0.74074074 0.73148148 0.74537037 0.74537037]
|
|
|
|
mean value: 0.7421296296296297
|
|
|
|
key: test_jcc
|
|
value: [0.58823529 0.5625 0.4 0.61111111 0.6875 0.5625
|
|
0.625 0.66666667 0.46666667 0.71428571]
|
|
|
|
mean value: 0.5884465452847806
|
|
|
|
key: train_jcc
|
|
value: [0.61111111 0.60839161 0.62237762 0.61971831 0.61111111 0.6013986
|
|
0.60839161 0.6 0.61538462 0.61805556]
|
|
|
|
mean value: 0.6115940143580989
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01207209 0.01655149 0.0160923 0.01483059 0.01510477 0.0145371
|
|
0.01643419 0.01679707 0.01524067 0.01558161]
|
|
|
|
mean value: 0.01532418727874756
|
|
|
|
key: score_time
|
|
value: [0.00867105 0.01097822 0.01092243 0.01148534 0.01145959 0.01148415
|
|
0.01148462 0.01149249 0.01149011 0.0115099 ]
|
|
|
|
mean value: 0.01109778881072998
|
|
|
|
key: test_mcc
|
|
value: [0.6761234 0.60246408 0.33333333 0.57735027 0.53033009 0.45834925
|
|
0.53033009 0.58536941 0.37796447 0.58536941]
|
|
|
|
mean value: 0.5256983789695563
|
|
|
|
key: train_mcc
|
|
value: [0.75124823 0.87996919 0.77093924 0.68511879 0.79848995 0.77013788
|
|
0.74779086 0.83462233 0.60587838 0.79473968]
|
|
|
|
mean value: 0.7638934525407249
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.79166667 0.66666667 0.75 0.75 0.70833333
|
|
0.75 0.79166667 0.625 0.79166667]
|
|
|
|
mean value: 0.7458333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.87037037 0.93981481 0.875 0.81944444 0.89814815 0.88425926
|
|
0.86111111 0.91666667 0.76851852 0.89351852]
|
|
|
|
mean value: 0.8726851851851851
|
|
|
|
key: test_fscore
|
|
value: [0.84615385 0.76190476 0.66666667 0.8 0.78571429 0.63157895
|
|
0.7 0.8 0.4 0.8 ]
|
|
|
|
mean value: 0.7192018507807981
|
|
|
|
key: train_fscore
|
|
value: [0.88034188 0.94063927 0.85863874 0.84705882 0.90178571 0.88038278
|
|
0.84042553 0.91428571 0.69879518 0.9004329 ]
|
|
|
|
mean value: 0.8662786533494914
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.88888889 0.66666667 0.66666667 0.6875 0.85714286
|
|
0.875 0.76923077 1. 0.76923077]
|
|
|
|
mean value: 0.7966040903540903
|
|
|
|
key: train_precision
|
|
value: [0.81746032 0.92792793 0.98795181 0.73469388 0.87068966 0.91089109
|
|
0.9875 0.94117647 1. 0.84552846]
|
|
|
|
mean value: 0.9023819600322295
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.66666667 0.66666667 1. 0.91666667 0.5
|
|
0.58333333 0.83333333 0.25 0.83333333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_recall
|
|
value: [0.9537037 0.9537037 0.75925926 1. 0.93518519 0.85185185
|
|
0.73148148 0.88888889 0.53703704 0.96296296]
|
|
|
|
mean value: 0.8574074074074074
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.79166667 0.66666667 0.75 0.75 0.70833333
|
|
0.75 0.79166667 0.625 0.79166667]
|
|
|
|
mean value: 0.7458333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.87037037 0.93981481 0.875 0.81944444 0.89814815 0.88425926
|
|
0.86111111 0.91666667 0.76851852 0.89351852]
|
|
|
|
mean value: 0.8726851851851851
|
|
|
|
key: test_jcc
|
|
value: [0.73333333 0.61538462 0.5 0.66666667 0.64705882 0.46153846
|
|
0.53846154 0.66666667 0.25 0.66666667]
|
|
|
|
mean value: 0.5745776772247361
|
|
|
|
key: train_jcc
|
|
value: [0.78625954 0.88793103 0.75229358 0.73469388 0.82113821 0.78632479
|
|
0.72477064 0.84210526 0.53703704 0.81889764]
|
|
|
|
mean value: 0.7691451609899106
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0141654 0.01597714 0.01418781 0.0147953 0.01495671 0.01431012
|
|
0.01488042 0.01638436 0.01453948 0.01479721]
|
|
|
|
mean value: 0.014899396896362304
|
|
|
|
key: score_time
|
|
value: [0.0115509 0.01146197 0.0115335 0.01147461 0.01151395 0.01156092
|
|
0.01154041 0.01152873 0.01148868 0.01163054]
|
|
|
|
mean value: 0.011528420448303222
|
|
|
|
key: test_mcc
|
|
value: [0.4472136 0.60246408 0.3380617 0.64168895 0.5 0.37796447
|
|
0.43033148 0.58536941 0.37796447 0.43033148]
|
|
|
|
mean value: 0.473138964023511
|
|
|
|
key: train_mcc
|
|
value: [0.4472136 0.86741806 0.68286996 0.65055321 0.86741806 0.45464702
|
|
0.83010976 0.83713046 0.53452248 0.6753356 ]
|
|
|
|
mean value: 0.684721819583476
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.79166667 0.66666667 0.79166667 0.75 0.625
|
|
0.70833333 0.79166667 0.625 0.70833333]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_accuracy
|
|
value: [0.66666667 0.93055556 0.82407407 0.80092593 0.93055556 0.6712963
|
|
0.91203704 0.91203704 0.72222222 0.81944444]
|
|
|
|
mean value: 0.8189814814814815
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.76190476 0.63636364 0.73684211 0.75 0.4
|
|
0.66666667 0.8 0.4 0.66666667]
|
|
|
|
mean value: 0.656844383686489
|
|
|
|
key: train_fscore
|
|
value: [0.75 0.93449782 0.79120879 0.75428571 0.92610837 0.51034483
|
|
0.90640394 0.91914894 0.61538462 0.78453039]
|
|
|
|
mean value: 0.7891913403240695
|
|
|
|
key: test_precision
|
|
value: [0.6 0.88888889 0.7 1. 0.75 1.
|
|
0.77777778 0.76923077 1. 0.77777778]
|
|
|
|
mean value: 0.8263675213675213
|
|
|
|
key: train_precision
|
|
value: [0.6 0.88429752 0.97297297 0.98507463 0.98947368 1.
|
|
0.96842105 0.8503937 1. 0.97260274]
|
|
|
|
mean value: 0.9223236297855336
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 0.58333333 0.58333333 0.75 0.25
|
|
0.58333333 0.83333333 0.25 0.58333333]
|
|
|
|
mean value: 0.6083333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 0.99074074 0.66666667 0.61111111 0.87037037 0.34259259
|
|
0.85185185 1. 0.44444444 0.65740741]
|
|
|
|
mean value: 0.7435185185185185
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.79166667 0.66666667 0.79166667 0.75 0.625
|
|
0.70833333 0.79166667 0.625 0.70833333]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_roc_auc
|
|
value: [0.66666667 0.93055556 0.82407407 0.80092593 0.93055556 0.6712963
|
|
0.91203704 0.91203704 0.72222222 0.81944444]
|
|
|
|
mean value: 0.8189814814814814
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.61538462 0.46666667 0.58333333 0.6 0.25
|
|
0.5 0.66666667 0.25 0.5 ]
|
|
|
|
mean value: 0.5032051282051282
|
|
|
|
key: train_jcc
|
|
value: [0.6 0.87704918 0.65454545 0.60550459 0.86238532 0.34259259
|
|
0.82882883 0.8503937 0.44444444 0.64545455]
|
|
|
|
mean value: 0.6711198655238018
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13095427 0.11557102 0.11594152 0.11598682 0.1163609 0.11597848
|
|
0.11679888 0.11590648 0.11655402 0.11645198]
|
|
|
|
mean value: 0.1176504373550415
|
|
|
|
key: score_time
|
|
value: [0.01498008 0.01493311 0.01499248 0.01484203 0.01487803 0.01490355
|
|
0.01487446 0.0148387 0.01498008 0.0148499 ]
|
|
|
|
mean value: 0.014907240867614746
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.57735027 0.38490018 0.58536941 0.5 0.60246408
|
|
0.6761234 0.6761234 1. 0.2508726 ]
|
|
|
|
mean value: 0.591987000896547
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.66666667 0.79166667 0.75 0.79166667
|
|
0.83333333 0.83333333 1. 0.625 ]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.66666667 0.73333333 0.7826087 0.75 0.76190476
|
|
0.84615385 0.81818182 1. 0.64 ]
|
|
|
|
mean value: 0.7832182455225933
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 0.61111111 0.81818182 0.75 0.88888889
|
|
0.78571429 0.9 1. 0.61538462]
|
|
|
|
mean value: 0.8202614052614052
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.5 0.91666667 0.75 0.75 0.66666667
|
|
0.91666667 0.75 1. 0.66666667]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.75 0.66666667 0.79166667 0.75 0.79166667
|
|
0.83333333 0.83333333 1. 0.625 ]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.5 0.57894737 0.64285714 0.6 0.61538462
|
|
0.73333333 0.69230769 1. 0.47058824]
|
|
|
|
mean value: 0.6547704101883669
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04484487 0.05021477 0.06698537 0.0464921 0.05736089 0.04467273
|
|
0.04351902 0.06231117 0.06437397 0.0654459 ]
|
|
|
|
mean value: 0.05462207794189453
|
|
|
|
key: score_time
|
|
value: [0.03038597 0.02814126 0.02920938 0.03339005 0.02574015 0.02409744
|
|
0.02607036 0.02961445 0.0295229 0.02212024]
|
|
|
|
mean value: 0.0278292179107666
|
|
|
|
key: test_mcc
|
|
value: [0.6761234 0.51298918 0.60246408 0.53033009 0.60246408 0.45834925
|
|
0.50709255 0.41812101 0.91986621 0.66666667]
|
|
|
|
mean value: 0.5894466501897947
|
|
|
|
key: train_mcc
|
|
value: [0.95407186 0.95472741 0.9459053 0.96312812 0.99078321 0.97259753
|
|
0.96362411 0.98164982 0.95407186 0.98148148]
|
|
|
|
mean value: 0.9662040699138664
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.70833333 0.79166667 0.75 0.79166667 0.70833333
|
|
0.75 0.70833333 0.95833333 0.83333333]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.97685185 0.97685185 0.97222222 0.98148148 0.99537037 0.98611111
|
|
0.98148148 0.99074074 0.97685185 0.99074074]
|
|
|
|
mean value: 0.9828703703703704
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.58823529 0.81481481 0.7 0.76190476 0.63157895
|
|
0.72727273 0.69565217 0.95652174 0.83333333]
|
|
|
|
mean value: 0.7527495610037002
|
|
|
|
key: train_fscore
|
|
value: [0.97652582 0.97630332 0.97142857 0.98130841 0.99534884 0.98591549
|
|
0.98113208 0.99065421 0.97652582 0.99074074]
|
|
|
|
mean value: 0.9825883295358523
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 0.73333333 0.875 0.88888889 0.85714286
|
|
0.8 0.72727273 1. 0.83333333]
|
|
|
|
mean value: 0.861497113997114
|
|
|
|
key: train_precision
|
|
value: [0.99047619 1. 1. 0.99056604 1. 1.
|
|
1. 1. 0.99047619 0.99074074]
|
|
|
|
mean value: 0.9962259159428971
|
|
|
|
key: test_recall
|
|
value: [0.75 0.41666667 0.91666667 0.58333333 0.66666667 0.5
|
|
0.66666667 0.66666667 0.91666667 0.83333333]
|
|
|
|
mean value: 0.6916666666666667
|
|
|
|
key: train_recall
|
|
value: [0.96296296 0.9537037 0.94444444 0.97222222 0.99074074 0.97222222
|
|
0.96296296 0.98148148 0.96296296 0.99074074]
|
|
|
|
mean value: 0.9694444444444444
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.70833333 0.79166667 0.75 0.79166667 0.70833333
|
|
0.75 0.70833333 0.95833333 0.83333333]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.97685185 0.97685185 0.97222222 0.98148148 0.99537037 0.98611111
|
|
0.98148148 0.99074074 0.97685185 0.99074074]
|
|
|
|
mean value: 0.9828703703703703
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.41666667 0.6875 0.53846154 0.61538462 0.46153846
|
|
0.57142857 0.53333333 0.91666667 0.71428571]
|
|
|
|
mean value: 0.614757326007326
|
|
|
|
key: train_jcc
|
|
value: [0.95412844 0.9537037 0.94444444 0.96330275 0.99074074 0.97222222
|
|
0.96296296 0.98148148 0.95412844 0.98165138]
|
|
|
|
mean value: 0.9658766564729867
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11546874 0.1362741 0.08421946 0.07674527 0.0547297 0.06163383
|
|
0.07058072 0.04774594 0.06891847 0.0557971 ]
|
|
|
|
mean value: 0.07721133232116699
|
|
|
|
key: score_time
|
|
value: [0.02076721 0.03748512 0.02270293 0.01903296 0.02165675 0.02445173
|
|
0.01773858 0.02347541 0.02349663 0.02122188]
|
|
|
|
mean value: 0.023202919960021974
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.41812101 0. 0.5 0.3380617 0.58536941
|
|
0.25819889 0.43033148 0.6761234 0.5 ]
|
|
|
|
mean value: 0.4372872557008492
|
|
|
|
key: train_mcc
|
|
value: [0.99078321 0.99078321 0.99078321 0.99078321 0.99078321 0.99078321
|
|
0.99078321 0.99078321 0.98164982 0.99078321]
|
|
|
|
mean value: 0.9898698738684077
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.70833333 0.5 0.75 0.66666667 0.79166667
|
|
0.625 0.70833333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.99537037 0.99537037 0.99537037 0.99537037 0.99537037 0.99537037
|
|
0.99537037 0.99537037 0.99074074 0.99537037]
|
|
|
|
mean value: 0.9949074074074074
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.69565217 0.57142857 0.75 0.69230769 0.8
|
|
0.66666667 0.74074074 0.84615385 0.75 ]
|
|
|
|
mean value: 0.7346283024543894
|
|
|
|
key: train_fscore
|
|
value: [0.99539171 0.99539171 0.99539171 0.99539171 0.99539171 0.99539171
|
|
0.99539171 0.99539171 0.99082569 0.99539171]
|
|
|
|
mean value: 0.9949351033695515
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.72727273 0.5 0.75 0.64285714 0.76923077
|
|
0.6 0.66666667 0.78571429 0.75 ]
|
|
|
|
mean value: 0.7025074925074926
|
|
|
|
key: train_precision
|
|
value: [0.99082569 0.99082569 0.99082569 0.99082569 0.99082569 0.99082569
|
|
0.99082569 0.99082569 0.98181818 0.99082569]
|
|
|
|
mean value: 0.9899249374478732
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.66666667 0.66666667 0.75 0.75 0.83333333
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.70833333 0.5 0.75 0.66666667 0.79166667
|
|
0.625 0.70833333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.99537037 0.99537037 0.99537037 0.99537037 0.99537037 0.99537037
|
|
0.99537037 0.99537037 0.99074074 0.99537037]
|
|
|
|
mean value: 0.9949074074074074
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.53333333 0.4 0.6 0.52941176 0.66666667
|
|
0.5 0.58823529 0.73333333 0.6 ]
|
|
|
|
mean value: 0.5865266106442577
|
|
|
|
key: train_jcc
|
|
value: [0.99082569 0.99082569 0.99082569 0.99082569 0.99082569 0.99082569
|
|
0.99082569 0.99082569 0.98181818 0.99082569]
|
|
|
|
mean value: 0.9899249374478732
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.39657617 0.38660288 0.38711214 0.38470411 0.39132905 0.38640523
|
|
0.3876338 0.38904047 0.39087391 0.39372563]
|
|
|
|
mean value: 0.3894003391265869
|
|
|
|
key: score_time
|
|
value: [0.00990486 0.00941515 0.00925326 0.00939202 0.0096159 0.00942993
|
|
0.00955319 0.0091846 0.00959134 0.00947499]
|
|
|
|
mean value: 0.009481525421142578
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.45834925 0.45834925 0.91986621 0.6761234 0.6761234
|
|
0.58536941 0.66666667 0.75261781 0.33333333]
|
|
|
|
mean value: 0.6279416540619365
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.70833333 0.70833333 0.95833333 0.83333333 0.83333333
|
|
0.79166667 0.83333333 0.875 0.66666667]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.63157895 0.75862069 0.96 0.84615385 0.81818182
|
|
0.7826087 0.83333333 0.88 0.66666667]
|
|
|
|
mean value: 0.8057143997011431
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.85714286 0.64705882 0.92307692 0.78571429 0.9
|
|
0.81818182 0.83333333 0.84615385 0.66666667]
|
|
|
|
mean value: 0.8123482399952988
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.5 0.91666667 1. 0.91666667 0.75
|
|
0.75 0.83333333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.70833333 0.70833333 0.95833333 0.83333333 0.83333333
|
|
0.79166667 0.83333333 0.875 0.66666667]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.46153846 0.61111111 0.92307692 0.73333333 0.69230769
|
|
0.64285714 0.71428571 0.78571429 0.5 ]
|
|
|
|
mean value: 0.684993894993895
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02040315 0.02058983 0.0212307 0.02100587 0.02085805 0.02074647
|
|
0.04295802 0.02070928 0.0207932 0.02259588]
|
|
|
|
mean value: 0.02318904399871826
|
|
|
|
key: score_time
|
|
value: [0.01255536 0.01628447 0.01540375 0.01492286 0.01830244 0.0138433
|
|
0.01333618 0.01456308 0.01523161 0.01233673]
|
|
|
|
mean value: 0.014677977561950684
|
|
|
|
key: test_mcc
|
|
value: [ 0.75261781 0.58536941 -0.0860663 0.43033148 0.2508726 0.50709255
|
|
0.25819889 0.33333333 0.43033148 0.16903085]
|
|
|
|
mean value: 0.36311121151182635
|
|
|
|
key: train_mcc
|
|
value: [1. 0.96362411 0.95472741 1. 0.86135677 0.99078321
|
|
0.92847669 0.99078321 0.90284331 0.9459053 ]
|
|
|
|
mean value: 0.9538500024738616
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.79166667 0.45833333 0.70833333 0.625 0.75
|
|
0.625 0.66666667 0.70833333 0.58333333]
|
|
|
|
mean value: 0.6791666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98148148 0.97685185 1. 0.92592593 0.99537037
|
|
0.96296296 0.99537037 0.94907407 0.97222222]
|
|
|
|
mean value: 0.9759259259259259
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.7826087 0.51851852 0.74074074 0.60869565 0.76923077
|
|
0.66666667 0.66666667 0.74074074 0.61538462]
|
|
|
|
mean value: 0.6989253065774804
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98181818 0.97737557 1. 0.93103448 0.99539171
|
|
0.96428571 0.99539171 0.95154185 0.97297297]
|
|
|
|
mean value: 0.9769812177804863
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.81818182 0.46666667 0.66666667 0.63636364 0.71428571
|
|
0.6 0.66666667 0.66666667 0.57142857]
|
|
|
|
mean value: 0.6653080253080252
|
|
|
|
key: train_precision
|
|
value: [1. 0.96428571 0.95575221 1. 0.87096774 0.99082569
|
|
0.93103448 0.99082569 0.90756303 0.94736842]
|
|
|
|
mean value: 0.9558622973778704
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.75 0.58333333 0.83333333 0.58333333 0.83333333
|
|
0.75 0.66666667 0.83333333 0.66666667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.79166667 0.45833333 0.70833333 0.625 0.75
|
|
0.625 0.66666667 0.70833333 0.58333333]
|
|
|
|
mean value: 0.6791666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98148148 0.97685185 1. 0.92592593 0.99537037
|
|
0.96296296 0.99537037 0.94907407 0.97222222]
|
|
|
|
mean value: 0.975925925925926
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.64285714 0.35 0.58823529 0.4375 0.625
|
|
0.5 0.5 0.58823529 0.44444444]
|
|
|
|
mean value: 0.5461986461251167
|
|
|
|
key: train_jcc
|
|
value: [1. 0.96428571 0.95575221 1. 0.87096774 0.99082569
|
|
0.93103448 0.99082569 0.90756303 0.94736842]
|
|
|
|
mean value: 0.9558622973778704
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01451707 0.01430178 0.01430941 0.01420307 0.0142262 0.02070832
|
|
0.04261613 0.0417614 0.04505873 0.03498101]
|
|
|
|
mean value: 0.02566831111907959
|
|
|
|
key: score_time
|
|
value: [0.01182199 0.01183081 0.01353741 0.01192141 0.01188087 0.01182985
|
|
0.0209434 0.02043557 0.02949667 0.02229738]
|
|
|
|
mean value: 0.016599535942077637
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.60246408 0.45834925 0.41812101 0.41812101 0.58536941
|
|
0.33333333 0.5 0.91986621 0.58536941]
|
|
|
|
mean value: 0.5573611501955348
|
|
|
|
key: train_mcc
|
|
value: [0.84291786 0.88949918 0.90756304 0.87171665 0.86203543 0.87996919
|
|
0.89849486 0.89026381 0.86203543 0.90803041]
|
|
|
|
mean value: 0.8812525853248737
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.79166667 0.70833333 0.70833333 0.70833333 0.79166667
|
|
0.66666667 0.75 0.95833333 0.79166667]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_accuracy
|
|
value: [0.9212963 0.94444444 0.9537037 0.93518519 0.93055556 0.93981481
|
|
0.94907407 0.94444444 0.93055556 0.9537037 ]
|
|
|
|
mean value: 0.9402777777777778
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.76190476 0.75862069 0.72 0.72 0.7826087
|
|
0.66666667 0.75 0.96 0.7826087 ]
|
|
|
|
mean value: 0.7771974726922253
|
|
|
|
key: train_fscore
|
|
value: [0.92237443 0.94545455 0.95412844 0.93693694 0.9321267 0.94063927
|
|
0.94977169 0.94594595 0.9321267 0.95454545]
|
|
|
|
mean value: 0.9414050105042868
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:155: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:158: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.88888889 0.64705882 0.69230769 0.69230769 0.81818182
|
|
0.66666667 0.75 0.92307692 0.81818182]
|
|
|
|
mean value: 0.780576123223182
|
|
|
|
key: train_precision
|
|
value: [0.90990991 0.92857143 0.94545455 0.9122807 0.91150442 0.92792793
|
|
0.93693694 0.92105263 0.91150442 0.9375 ]
|
|
|
|
mean value: 0.9242642931691604
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.66666667 0.91666667 0.75 0.75 0.75
|
|
0.66666667 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.93518519 0.96296296 0.96296296 0.96296296 0.9537037 0.9537037
|
|
0.96296296 0.97222222 0.9537037 0.97222222]
|
|
|
|
mean value: 0.9592592592592593
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.79166667 0.70833333 0.70833333 0.70833333 0.79166667
|
|
0.66666667 0.75 0.95833333 0.79166667]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_roc_auc
|
|
value: [0.9212963 0.94444444 0.9537037 0.93518519 0.93055556 0.93981481
|
|
0.94907407 0.94444444 0.93055556 0.9537037 ]
|
|
|
|
mean value: 0.9402777777777778
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.61538462 0.61111111 0.5625 0.5625 0.64285714
|
|
0.5 0.6 0.92307692 0.64285714]
|
|
|
|
mean value: 0.6429517704517704
|
|
|
|
key: train_jcc
|
|
value: [0.8559322 0.89655172 0.9122807 0.88135593 0.87288136 0.88793103
|
|
0.90434783 0.8974359 0.87288136 0.91304348]
|
|
|
|
mean value: 0.8894641509616427
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.12958741 0.2245791 0.22601509 0.22674584 0.22733378 0.2300446
|
|
0.22894859 0.34019399 0.22946429 0.22652197]
|
|
|
|
mean value: 0.22894346714019775
|
|
|
|
key: score_time
|
|
value: [0.03218746 0.02195168 0.0238595 0.02112699 0.02143717 0.02339196
|
|
0.02224898 0.02193046 0.02027369 0.02168155]
|
|
|
|
mean value: 0.02300894260406494
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.6761234 0.64168895 0.58536941 0.60246408 0.60246408
|
|
0.5 0.58536941 0.75261781 0.66666667]
|
|
|
|
mean value: 0.6365381602545337
|
|
|
|
key: train_mcc
|
|
value: [0.74704394 0.80836728 0.78113347 0.75158034 0.75158034 0.76253505
|
|
0.77898084 0.77120096 0.77013788 0.75261781]
|
|
|
|
mean value: 0.7675177902273471
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.83333333 0.79166667 0.79166667 0.79166667 0.79166667
|
|
0.75 0.79166667 0.875 0.83333333]
|
|
|
|
mean value: 0.8125
|
|
|
|
key: train_accuracy
|
|
value: [0.87037037 0.90277778 0.88888889 0.875 0.875 0.87962963
|
|
0.88888889 0.88425926 0.88425926 0.875 ]
|
|
|
|
mean value: 0.8824074074074074
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.81818182 0.82758621 0.8 0.81481481 0.76190476
|
|
0.75 0.8 0.88 0.83333333]
|
|
|
|
mean value: 0.816582093513128
|
|
|
|
key: train_fscore
|
|
value: [0.87826087 0.90666667 0.89380531 0.87892377 0.87892377 0.88495575
|
|
0.89189189 0.88888889 0.88789238 0.88 ]
|
|
|
|
mean value: 0.8870209289273469
|
|
|
|
key: test_precision
|
|
value: [0.84615385 0.9 0.70588235 0.76923077 0.73333333 0.88888889
|
|
0.75 0.76923077 0.84615385 0.83333333]
|
|
|
|
mean value: 0.8042207139265963
|
|
|
|
key: train_precision
|
|
value: [0.82786885 0.87179487 0.8559322 0.85217391 0.85217391 0.84745763
|
|
0.86842105 0.85470085 0.86086957 0.84615385]
|
|
|
|
mean value: 0.853754669955299
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.75 1. 0.83333333 0.91666667 0.66666667
|
|
0.75 0.83333333 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8416666666666667
|
|
|
|
key: train_recall
|
|
value: [0.93518519 0.94444444 0.93518519 0.90740741 0.90740741 0.92592593
|
|
0.91666667 0.92592593 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9231481481481482
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.83333333 0.79166667 0.79166667 0.79166667 0.79166667
|
|
0.75 0.79166667 0.875 0.83333333]
|
|
|
|
mean value: 0.8125
|
|
|
|
key: train_roc_auc
|
|
value: [0.87037037 0.90277778 0.88888889 0.875 0.875 0.87962963
|
|
0.88888889 0.88425926 0.88425926 0.875 ]
|
|
|
|
mean value: 0.8824074074074074
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.69230769 0.70588235 0.66666667 0.6875 0.61538462
|
|
0.6 0.66666667 0.78571429 0.71428571]
|
|
|
|
mean value: 0.6920122279681103
|
|
|
|
key: train_jcc
|
|
value: [0.78294574 0.82926829 0.808 0.784 0.784 0.79365079
|
|
0.80487805 0.8 0.7983871 0.78571429]
|
|
|
|
mean value: 0.7970844254036796
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03095245 0.02517986 0.03043199 0.0321815 0.03195596 0.03187084
|
|
0.03063774 0.03242993 0.02801824 0.02898526]
|
|
|
|
mean value: 0.03026437759399414
|
|
|
|
key: score_time
|
|
value: [0.01542759 0.01177216 0.01163268 0.01346421 0.01373935 0.01198483
|
|
0.01169944 0.01361442 0.01173401 0.01166964]
|
|
|
|
mean value: 0.01267383098602295
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.6761234 0.58536941 0.75261781 0.60246408 0.43033148
|
|
0.43033148 0.75261781 0.66414149 0.56490196]
|
|
|
|
mean value: 0.5792232247346277
|
|
|
|
key: train_mcc
|
|
value: [0.82419551 0.8054638 0.80659666 0.79494819 0.82275335 0.82332931
|
|
0.80659666 0.77911093 0.80493517 0.77897523]
|
|
|
|
mean value: 0.8046904809155915
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.79166667 0.875 0.79166667 0.70833333
|
|
0.70833333 0.875 0.82608696 0.7826087 ]
|
|
|
|
mean value: 0.7858695652173913
|
|
|
|
key: train_accuracy
|
|
value: [0.91121495 0.90186916 0.90186916 0.89719626 0.91121495 0.91121495
|
|
0.90186916 0.88785047 0.90232558 0.88837209]
|
|
|
|
mean value: 0.9014996739839165
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.81818182 0.8 0.86956522 0.81481481 0.66666667
|
|
0.74074074 0.88 0.83333333 0.8 ]
|
|
|
|
mean value: 0.7889969257795345
|
|
|
|
key: train_fscore
|
|
value: [0.91402715 0.90497738 0.9058296 0.89908257 0.9124424 0.91324201
|
|
0.9058296 0.89285714 0.90410959 0.89189189]
|
|
|
|
mean value: 0.9044289315755244
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.9 0.76923077 0.90909091 0.73333333 0.77777778
|
|
0.66666667 0.84615385 0.76923077 0.76923077]
|
|
|
|
mean value: 0.7807381507381508
|
|
|
|
key: train_precision
|
|
value: [0.88596491 0.87719298 0.87068966 0.88288288 0.9 0.89285714
|
|
0.87068966 0.85470085 0.89189189 0.86086957]
|
|
|
|
mean value: 0.8787739542631834
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.75 0.83333333 0.83333333 0.91666667 0.58333333
|
|
0.83333333 0.91666667 0.90909091 0.83333333]
|
|
|
|
mean value: 0.8075757575757576
|
|
|
|
key: train_recall
|
|
value: [0.94392523 0.93457944 0.94392523 0.91588785 0.92523364 0.93457944
|
|
0.94392523 0.93457944 0.91666667 0.92523364]
|
|
|
|
mean value: 0.9318535825545171
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.79166667 0.875 0.79166667 0.70833333
|
|
0.70833333 0.875 0.82954545 0.78030303]
|
|
|
|
mean value: 0.7859848484848485
|
|
|
|
key: train_roc_auc
|
|
value: [0.91121495 0.90186916 0.90186916 0.89719626 0.91121495 0.91121495
|
|
0.90186916 0.88785047 0.90225857 0.88854275]
|
|
|
|
mean value: 0.9015100380754586
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.69230769 0.66666667 0.76923077 0.6875 0.5
|
|
0.58823529 0.78571429 0.71428571 0.66666667]
|
|
|
|
mean value: 0.6570607088989442
|
|
|
|
key: train_jcc
|
|
value: [0.84166667 0.82644628 0.82786885 0.81666667 0.83898305 0.84033613
|
|
0.82786885 0.80645161 0.825 0.80487805]
|
|
|
|
mean value: 0.8256166166228054
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.8838706 0.72082353 0.72018147 0.89305925 0.71959066 0.72031975
|
|
0.79101253 0.71640468 0.70504045 0.91068506]
|
|
|
|
mean value: 0.7780987977981567
|
|
|
|
key: score_time
|
|
value: [0.01200795 0.01196694 0.01197743 0.01194954 0.01201367 0.01194072
|
|
0.01195097 0.01195168 0.01200342 0.01209617]
|
|
|
|
mean value: 0.011985850334167481
|
|
|
|
key: test_mcc
|
|
value: [0.41812101 0.66666667 0.58536941 0.84515425 0.60246408 0.43033148
|
|
0.53033009 0.75261781 0.48856385 0.56490196]
|
|
|
|
mean value: 0.5884520595841621
|
|
|
|
key: train_mcc
|
|
value: [0.76181538 0.75032247 0.77043718 0.68331814 0.68415789 0.76800037
|
|
0.76033717 0.75164603 0.70417011 0.77022946]
|
|
|
|
mean value: 0.7404434189570132
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.83333333 0.79166667 0.91666667 0.79166667 0.70833333
|
|
0.75 0.875 0.73913043 0.7826087 ]
|
|
|
|
mean value: 0.7896739130434782
|
|
|
|
key: train_accuracy
|
|
value: [0.87850467 0.87383178 0.88317757 0.8411215 0.8411215 0.88317757
|
|
0.87850467 0.87383178 0.85116279 0.88372093]
|
|
|
|
mean value: 0.8688154748967616
|
|
|
|
key: test_fscore
|
|
value: [0.72 0.83333333 0.8 0.92307692 0.81481481 0.66666667
|
|
0.78571429 0.88 0.75 0.8 ]
|
|
|
|
mean value: 0.7973606023606024
|
|
|
|
key: train_fscore
|
|
value: [0.88495575 0.87892377 0.88888889 0.84545455 0.84684685 0.88687783
|
|
0.88392857 0.88 0.85714286 0.88789238]
|
|
|
|
mean value: 0.8740911433526155
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.83333333 0.76923077 0.85714286 0.73333333 0.77777778
|
|
0.6875 0.84615385 0.69230769 0.76923077]
|
|
|
|
mean value: 0.7658318070818071
|
|
|
|
key: train_precision
|
|
value: [0.84033613 0.84482759 0.84745763 0.82300885 0.8173913 0.85964912
|
|
0.84615385 0.83898305 0.82758621 0.85344828]
|
|
|
|
mean value: 0.8398842004251612
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 0.83333333 1. 0.91666667 0.58333333
|
|
0.91666667 0.91666667 0.81818182 0.83333333]
|
|
|
|
mean value: 0.8401515151515152
|
|
|
|
key: train_recall
|
|
value: [0.93457944 0.91588785 0.93457944 0.86915888 0.87850467 0.91588785
|
|
0.92523364 0.92523364 0.88888889 0.92523364]
|
|
|
|
mean value: 0.911318795430945
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.83333333 0.79166667 0.91666667 0.79166667 0.70833333
|
|
0.75 0.875 0.74242424 0.78030303]
|
|
|
|
mean value: 0.7897727272727273
|
|
|
|
key: train_roc_auc
|
|
value: [0.87850467 0.87383178 0.88317757 0.8411215 0.8411215 0.88317757
|
|
0.87850467 0.87383178 0.8509865 0.88391312]
|
|
|
|
mean value: 0.8688170647282797
|
|
|
|
key: test_jcc
|
|
value: [0.5625 0.71428571 0.66666667 0.85714286 0.6875 0.5
|
|
0.64705882 0.78571429 0.6 0.66666667]
|
|
|
|
mean value: 0.6687535014005602
|
|
|
|
key: train_jcc
|
|
value: [0.79365079 0.784 0.8 0.73228346 0.734375 0.79674797
|
|
0.792 0.78571429 0.75 0.7983871 ]
|
|
|
|
mean value: 0.7767158608185877
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01436901 0.01197767 0.00945544 0.00957227 0.00921988 0.00907397
|
|
0.0101347 0.00925565 0.00901246 0.00902224]
|
|
|
|
mean value: 0.010109329223632812
|
|
|
|
key: score_time
|
|
value: [0.01210284 0.00923228 0.00915217 0.00906968 0.00897646 0.00888538
|
|
0.00887156 0.00883389 0.00879741 0.00943661]
|
|
|
|
mean value: 0.009335827827453614
|
|
|
|
key: test_mcc
|
|
value: [0.45834925 0.35355339 0.58536941 0.57735027 0.57735027 0.58536941
|
|
0.09166985 0.51298918 0.44411739 0.42228828]
|
|
|
|
mean value: 0.4608406691502399
|
|
|
|
key: train_mcc
|
|
value: [0.4856668 0.51639778 0.51822739 0.52240206 0.50180978 0.46360045
|
|
0.51088537 0.50987255 0.51938062 0.49694198]
|
|
|
|
mean value: 0.5045184769946628
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.66666667 0.79166667 0.75 0.75 0.79166667
|
|
0.54166667 0.70833333 0.69565217 0.69565217]
|
|
|
|
mean value: 0.709963768115942
|
|
|
|
key: train_accuracy
|
|
value: [0.72429907 0.72429907 0.73831776 0.74766355 0.73364486 0.72429907
|
|
0.74299065 0.73831776 0.74418605 0.73023256]
|
|
|
|
mean value: 0.7348250380352097
|
|
|
|
key: test_fscore
|
|
value: [0.75862069 0.71428571 0.8 0.8 0.8 0.7826087
|
|
0.62068966 0.77419355 0.74074074 0.75862069]
|
|
|
|
mean value: 0.7549759733548485
|
|
|
|
key: train_fscore
|
|
value: [0.76862745 0.77902622 0.78125 0.78225806 0.77470356 0.75518672
|
|
0.77732794 0.77777778 0.7826087 0.77165354]
|
|
|
|
mean value: 0.7750419963988651
|
|
|
|
key: test_precision
|
|
value: [0.64705882 0.625 0.76923077 0.66666667 0.66666667 0.81818182
|
|
0.52941176 0.63157895 0.625 0.64705882]
|
|
|
|
mean value: 0.6625854279879048
|
|
|
|
key: train_precision
|
|
value: [0.66216216 0.65 0.67114094 0.68794326 0.67123288 0.67910448
|
|
0.68571429 0.67586207 0.68275862 0.66666667]
|
|
|
|
mean value: 0.6732585360531219
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.83333333 1. 1. 0.75
|
|
0.75 1. 0.90909091 0.91666667]
|
|
|
|
mean value: 0.8909090909090909
|
|
|
|
key: train_recall
|
|
value: [0.91588785 0.97196262 0.93457944 0.90654206 0.91588785 0.85046729
|
|
0.89719626 0.91588785 0.91666667 0.91588785]
|
|
|
|
mean value: 0.9140965732087227
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.66666667 0.79166667 0.75 0.75 0.79166667
|
|
0.54166667 0.70833333 0.70454545 0.68560606]
|
|
|
|
mean value: 0.7098484848484848
|
|
|
|
key: train_roc_auc
|
|
value: [0.72429907 0.72429907 0.73831776 0.74766355 0.73364486 0.72429907
|
|
0.74299065 0.73831776 0.74338006 0.73109207]
|
|
|
|
mean value: 0.7348303911388023
|
|
|
|
key: test_jcc
|
|
value: [0.61111111 0.55555556 0.66666667 0.66666667 0.66666667 0.64285714
|
|
0.45 0.63157895 0.58823529 0.61111111]
|
|
|
|
mean value: 0.6090449162120989
|
|
|
|
key: train_jcc
|
|
value: [0.62420382 0.63803681 0.64102564 0.64238411 0.63225806 0.60666667
|
|
0.63576159 0.63636364 0.64285714 0.62820513]
|
|
|
|
mean value: 0.6327762606470584
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00941443 0.00931597 0.00942731 0.00927639 0.0095489 0.01009607
|
|
0.00941873 0.00929666 0.00949907 0.00945783]
|
|
|
|
mean value: 0.009475135803222656
|
|
|
|
key: score_time
|
|
value: [0.00885749 0.00908804 0.00896859 0.00891495 0.00882578 0.0092988
|
|
0.008847 0.00886583 0.00917006 0.00905085]
|
|
|
|
mean value: 0.008988738059997559
|
|
|
|
key: test_mcc
|
|
value: [0.41812101 0.16903085 0.58536941 0.58536941 0.2508726 0.58536941
|
|
0.5 0.50709255 0.76764947 0.50168817]
|
|
|
|
mean value: 0.4870562873788939
|
|
|
|
key: train_mcc
|
|
value: [0.59043763 0.63620901 0.62792574 0.61814664 0.65561007 0.62010797
|
|
0.63889912 0.63328843 0.60964859 0.64701307]
|
|
|
|
mean value: 0.6277286276584018
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.58333333 0.79166667 0.79166667 0.625 0.79166667
|
|
0.75 0.75 0.86956522 0.73913043]
|
|
|
|
mean value: 0.740036231884058
|
|
|
|
key: train_accuracy
|
|
value: [0.79439252 0.81775701 0.81308411 0.80841121 0.8271028 0.80841121
|
|
0.81775701 0.81308411 0.80465116 0.82325581]
|
|
|
|
mean value: 0.8127906976744186
|
|
|
|
key: test_fscore
|
|
value: [0.72 0.54545455 0.7826087 0.8 0.60869565 0.7826087
|
|
0.75 0.76923077 0.88 0.78571429]
|
|
|
|
mean value: 0.7424312643877861
|
|
|
|
key: train_fscore
|
|
value: [0.8018018 0.82191781 0.81981982 0.81447964 0.83257919 0.81777778
|
|
0.82666667 0.82608696 0.80909091 0.82568807]
|
|
|
|
mean value: 0.8195908636821799
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.6 0.81818182 0.76923077 0.63636364 0.81818182
|
|
0.75 0.71428571 0.78571429 0.6875 ]
|
|
|
|
mean value: 0.7271765734265734
|
|
|
|
key: train_precision
|
|
value: [0.77391304 0.80357143 0.79130435 0.78947368 0.80701754 0.77966102
|
|
0.78813559 0.77235772 0.79464286 0.81081081]
|
|
|
|
mean value: 0.7910888049646347
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.75 0.83333333 0.58333333 0.75
|
|
0.75 0.83333333 1. 0.91666667]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_recall
|
|
value: [0.8317757 0.8411215 0.85046729 0.8411215 0.85981308 0.85981308
|
|
0.86915888 0.88785047 0.82407407 0.8411215 ]
|
|
|
|
mean value: 0.8506317064728279
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.58333333 0.79166667 0.79166667 0.625 0.79166667
|
|
0.75 0.75 0.875 0.73106061]
|
|
|
|
mean value: 0.7397727272727274
|
|
|
|
key: train_roc_auc
|
|
value: [0.79439252 0.81775701 0.81308411 0.80841121 0.8271028 0.80841121
|
|
0.81775701 0.81308411 0.8045604 0.82333853]
|
|
|
|
mean value: 0.8127898926964348
|
|
|
|
key: test_jcc
|
|
value: [0.5625 0.375 0.64285714 0.66666667 0.4375 0.64285714
|
|
0.6 0.625 0.78571429 0.64705882]
|
|
|
|
mean value: 0.598515406162465
|
|
|
|
key: train_jcc
|
|
value: [0.66917293 0.69767442 0.69465649 0.6870229 0.71317829 0.69172932
|
|
0.70454545 0.7037037 0.67938931 0.703125 ]
|
|
|
|
mean value: 0.6944197829356628
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00901866 0.00923586 0.00900841 0.00987458 0.00992274 0.00976443
|
|
0.00986457 0.01014185 0.00996709 0.0086441 ]
|
|
|
|
mean value: 0.00954422950744629
|
|
|
|
key: score_time
|
|
value: [0.01099586 0.01051021 0.01014256 0.01094532 0.01093531 0.01087236
|
|
0.0109477 0.01107359 0.01543617 0.01557159]
|
|
|
|
mean value: 0.01174306869506836
|
|
|
|
key: test_mcc
|
|
value: [0.25819889 0.58536941 0.3380617 0.16903085 0.16903085 0.60246408
|
|
0.25819889 0.5 0.31298622 0.12878788]
|
|
|
|
mean value: 0.33221287636298225
|
|
|
|
key: train_mcc
|
|
value: [0.62297427 0.63014358 0.61814664 0.63889912 0.57503685 0.59389052
|
|
0.6377741 0.59252307 0.63165773 0.62203998]
|
|
|
|
mean value: 0.6163085857672724
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.79166667 0.66666667 0.58333333 0.58333333 0.79166667
|
|
0.625 0.75 0.65217391 0.56521739]
|
|
|
|
mean value: 0.6634057971014493
|
|
|
|
key: train_accuracy
|
|
value: [0.80841121 0.81308411 0.80841121 0.81775701 0.78504673 0.79439252
|
|
0.81775701 0.79439252 0.81395349 0.80930233]
|
|
|
|
mean value: 0.8062508150402087
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.69230769 0.61538462 0.61538462 0.76190476
|
|
0.66666667 0.75 0.66666667 0.58333333]
|
|
|
|
mean value: 0.6818315018315018
|
|
|
|
key: train_fscore
|
|
value: [0.8209607 0.82300885 0.81447964 0.82666667 0.79824561 0.80701754
|
|
0.82511211 0.80530973 0.8245614 0.81777778]
|
|
|
|
mean value: 0.8163140034241074
|
|
|
|
key: test_precision
|
|
value: [0.6 0.76923077 0.64285714 0.57142857 0.57142857 0.88888889
|
|
0.6 0.75 0.61538462 0.58333333]
|
|
|
|
mean value: 0.6592551892551892
|
|
|
|
key: train_precision
|
|
value: [0.7704918 0.78151261 0.78947368 0.78813559 0.75206612 0.76033058
|
|
0.79310345 0.76470588 0.78333333 0.77966102]
|
|
|
|
mean value: 0.7762814060877736
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 0.75 0.66666667 0.66666667 0.66666667
|
|
0.75 0.75 0.72727273 0.58333333]
|
|
|
|
mean value: 0.7143939393939394
|
|
|
|
key: train_recall
|
|
value: [0.87850467 0.86915888 0.8411215 0.86915888 0.85046729 0.85981308
|
|
0.85981308 0.85046729 0.87037037 0.85981308]
|
|
|
|
mean value: 0.8608688127379716
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.79166667 0.66666667 0.58333333 0.58333333 0.79166667
|
|
0.625 0.75 0.65530303 0.56439394]
|
|
|
|
mean value: 0.6636363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [0.80841121 0.81308411 0.80841121 0.81775701 0.78504673 0.79439252
|
|
0.81775701 0.79439252 0.81368986 0.80953617]
|
|
|
|
mean value: 0.8062478366216683
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.52941176 0.44444444 0.44444444 0.61538462
|
|
0.5 0.6 0.5 0.41176471]
|
|
|
|
mean value: 0.5212116641528406
|
|
|
|
key: train_jcc
|
|
value: [0.6962963 0.69924812 0.6870229 0.70454545 0.66423358 0.67647059
|
|
0.70229008 0.67407407 0.70149254 0.69172932]
|
|
|
|
mean value: 0.6897402947815147
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01391244 0.01355982 0.01375175 0.01338625 0.01189446 0.01187778
|
|
0.01245689 0.0119462 0.01181173 0.0138557 ]
|
|
|
|
mean value: 0.012845301628112793
|
|
|
|
key: score_time
|
|
value: [0.01098967 0.01039457 0.0104115 0.01010585 0.01013899 0.00966024
|
|
0.00945163 0.00955868 0.00956511 0.0103538 ]
|
|
|
|
mean value: 0.01006300449371338
|
|
|
|
key: test_mcc
|
|
value: [0.50709255 0.58536941 0.6761234 0.58536941 0.60246408 0.58536941
|
|
0.43033148 0.6761234 0.65151515 0.50168817]
|
|
|
|
mean value: 0.5801446459328247
|
|
|
|
key: train_mcc
|
|
value: [0.78691547 0.78452148 0.79943589 0.77911093 0.82419551 0.79943589
|
|
0.80801948 0.77399833 0.81614982 0.77323619]
|
|
|
|
mean value: 0.7945018986838522
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.79166667 0.83333333 0.79166667 0.79166667 0.79166667
|
|
0.70833333 0.83333333 0.82608696 0.73913043]
|
|
|
|
mean value: 0.7856884057971014
|
|
|
|
key: train_accuracy
|
|
value: [0.88785047 0.88785047 0.89719626 0.88785047 0.91121495 0.89719626
|
|
0.90186916 0.88317757 0.90697674 0.88372093]
|
|
|
|
mean value: 0.894490328189524
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.8 0.84615385 0.7826087 0.81481481 0.7826087
|
|
0.74074074 0.84615385 0.81818182 0.78571429]
|
|
|
|
mean value: 0.7986207512294469
|
|
|
|
key: train_fscore
|
|
value: [0.89655172 0.89565217 0.90265487 0.89285714 0.91402715 0.90265487
|
|
0.90666667 0.89082969 0.91071429 0.88986784]
|
|
|
|
mean value: 0.9002476412856446
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.76923077 0.78571429 0.81818182 0.73333333 0.81818182
|
|
0.66666667 0.78571429 0.81818182 0.6875 ]
|
|
|
|
mean value: 0.759699050949051
|
|
|
|
key: train_precision
|
|
value: [0.832 0.83739837 0.85714286 0.85470085 0.88596491 0.85714286
|
|
0.86440678 0.83606557 0.87931034 0.84166667]
|
|
|
|
mean value: 0.8545799220176772
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.83333333 0.91666667 0.75 0.91666667 0.75
|
|
0.83333333 0.91666667 0.81818182 0.91666667]
|
|
|
|
mean value: 0.8484848484848485
|
|
|
|
key: train_recall
|
|
value: [0.97196262 0.96261682 0.95327103 0.93457944 0.94392523 0.95327103
|
|
0.95327103 0.95327103 0.94444444 0.94392523]
|
|
|
|
mean value: 0.9514537902388369
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.79166667 0.83333333 0.79166667 0.79166667 0.79166667
|
|
0.70833333 0.83333333 0.82575758 0.73106061]
|
|
|
|
mean value: 0.7848484848484849
|
|
|
|
key: train_roc_auc
|
|
value: [0.88785047 0.88785047 0.89719626 0.88785047 0.91121495 0.89719626
|
|
0.90186916 0.88317757 0.90680166 0.88399965]
|
|
|
|
mean value: 0.8945006922810661
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.66666667 0.73333333 0.64285714 0.6875 0.64285714
|
|
0.58823529 0.73333333 0.69230769 0.64705882]
|
|
|
|
mean value: 0.665914942900237
|
|
|
|
key: train_jcc
|
|
value: [0.8125 0.81102362 0.82258065 0.80645161 0.84166667 0.82258065
|
|
0.82926829 0.80314961 0.83606557 0.8015873 ]
|
|
|
|
mean value: 0.818687396627965
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.98078847 1.14474893 1.03964257 1.1282227 1.03296232 0.8334837
|
|
0.89926815 1.07796717 0.77976942 0.66169286]
|
|
|
|
mean value: 0.9578546285629272
|
|
|
|
key: score_time
|
|
value: [0.01240253 0.01251841 0.01240301 0.01245475 0.01492858 0.01274514
|
|
0.01272035 0.01467299 0.01266956 0.01264405]
|
|
|
|
mean value: 0.013015937805175782
|
|
|
|
key: test_mcc
|
|
value: [0.2508726 0.53033009 0.53033009 0.66666667 0.50709255 0.50709255
|
|
0.45834925 0.83333333 0.66414149 0.56818182]
|
|
|
|
mean value: 0.5516390435152891
|
|
|
|
key: train_mcc
|
|
value: [0.95431352 0.98130841 0.91914503 0.93560149 0.95331266 0.8880056
|
|
0.9178541 0.96278502 0.87999381 0.89803517]
|
|
|
|
mean value: 0.9290354812196953
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.75 0.75 0.83333333 0.75 0.75
|
|
0.70833333 0.91666667 0.82608696 0.7826087 ]
|
|
|
|
mean value: 0.7692028985507247
|
|
|
|
key: train_accuracy
|
|
value: [0.97663551 0.99065421 0.95794393 0.96728972 0.97663551 0.94392523
|
|
0.95794393 0.98130841 0.93953488 0.94883721]
|
|
|
|
mean value: 0.9640708541621387
|
|
|
|
key: test_fscore
|
|
value: [0.64 0.7 0.78571429 0.83333333 0.76923077 0.72727273
|
|
0.75862069 0.91666667 0.83333333 0.7826087 ]
|
|
|
|
mean value: 0.7746780500858461
|
|
|
|
key: train_fscore
|
|
value: [0.97716895 0.99065421 0.95964126 0.96803653 0.97674419 0.94444444
|
|
0.95927602 0.98113208 0.94117647 0.94930876]
|
|
|
|
mean value: 0.9647582891075718
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.875 0.6875 0.83333333 0.71428571 0.8
|
|
0.64705882 0.91666667 0.76923077 0.81818182]
|
|
|
|
mean value: 0.7676641740612329
|
|
|
|
key: train_precision
|
|
value: [0.95535714 0.99065421 0.92241379 0.94642857 0.97222222 0.93577982
|
|
0.92982456 0.99047619 0.92035398 0.93636364]
|
|
|
|
mean value: 0.9499874122276843
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.58333333 0.91666667 0.83333333 0.83333333 0.66666667
|
|
0.91666667 0.91666667 0.90909091 0.75 ]
|
|
|
|
mean value: 0.7992424242424242
|
|
|
|
key: train_recall
|
|
value: [1. 0.99065421 1. 0.99065421 0.98130841 0.95327103
|
|
0.99065421 0.97196262 0.96296296 0.96261682]
|
|
|
|
mean value: 0.9804084458290065
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.75 0.75 0.83333333 0.75 0.75
|
|
0.70833333 0.91666667 0.82954545 0.78409091]
|
|
|
|
mean value: 0.7696969696969697
|
|
|
|
key: train_roc_auc
|
|
value: [0.97663551 0.99065421 0.95794393 0.96728972 0.97663551 0.94392523
|
|
0.95794393 0.98130841 0.93942541 0.948901 ]
|
|
|
|
mean value: 0.9640662859120803
|
|
|
|
key: test_jcc
|
|
value: [0.47058824 0.53846154 0.64705882 0.71428571 0.625 0.57142857
|
|
0.61111111 0.84615385 0.71428571 0.64285714]
|
|
|
|
mean value: 0.6381230697407168
|
|
|
|
key: train_jcc
|
|
value: [0.95535714 0.98148148 0.92241379 0.9380531 0.95454545 0.89473684
|
|
0.92173913 0.96296296 0.88888889 0.90350877]
|
|
|
|
mean value: 0.9323687565654382
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02077341 0.01784921 0.01781964 0.01727605 0.0172956 0.01682329
|
|
0.01593351 0.01639438 0.01650286 0.01686263]
|
|
|
|
mean value: 0.017353057861328125
|
|
|
|
key: score_time
|
|
value: [0.01231551 0.00951004 0.00973606 0.00914216 0.0093534 0.00895286
|
|
0.00935936 0.00957298 0.00883436 0.00960231]
|
|
|
|
mean value: 0.009637904167175294
|
|
|
|
key: test_mcc
|
|
value: [0.16666667 0.43033148 0.6761234 0.5 0.41812101 0.53033009
|
|
0.50709255 0.25819889 0.31298622 0.56490196]
|
|
|
|
mean value: 0.4364752260633302
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.58333333 0.70833333 0.83333333 0.75 0.70833333 0.75
|
|
0.75 0.625 0.65217391 0.7826087 ]
|
|
|
|
mean value: 0.7143115942028986
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.58333333 0.66666667 0.84615385 0.75 0.72 0.7
|
|
0.76923077 0.66666667 0.66666667 0.8 ]
|
|
|
|
mean value: 0.7168717948717949
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.58333333 0.77777778 0.78571429 0.75 0.69230769 0.875
|
|
0.71428571 0.6 0.61538462 0.76923077]
|
|
|
|
mean value: 0.7163034188034189
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.58333333 0.58333333 0.91666667 0.75 0.75 0.58333333
|
|
0.83333333 0.75 0.72727273 0.83333333]
|
|
|
|
mean value: 0.7310606060606061
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.70833333 0.83333333 0.75 0.70833333 0.75
|
|
0.75 0.625 0.65530303 0.78030303]
|
|
|
|
mean value: 0.7143939393939395
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.41176471 0.5 0.73333333 0.6 0.5625 0.53846154
|
|
0.625 0.5 0.5 0.66666667]
|
|
|
|
mean value: 0.5637726244343891
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09941649 0.10254478 0.10431123 0.10478163 0.10160041 0.10014677
|
|
0.09986472 0.09947181 0.10004663 0.10172796]
|
|
|
|
mean value: 0.10139124393463135
|
|
|
|
key: score_time
|
|
value: [0.01773238 0.0191679 0.01867318 0.01925397 0.01925659 0.01850605
|
|
0.01875663 0.01869678 0.01848745 0.01848054]
|
|
|
|
mean value: 0.01870114803314209
|
|
|
|
key: test_mcc
|
|
value: [0.2508726 0.43033148 0.58536941 0.58536941 0.2508726 0.53033009
|
|
0.43033148 0.6761234 0.76764947 0.48075018]
|
|
|
|
mean value: 0.49880001244620575
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.70833333 0.79166667 0.79166667 0.625 0.75
|
|
0.70833333 0.83333333 0.86956522 0.73913043]
|
|
|
|
mean value: 0.7442028985507246
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.64 0.66666667 0.8 0.7826087 0.64 0.7
|
|
0.74074074 0.84615385 0.88 0.76923077]
|
|
|
|
mean value: 0.7465400718444197
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.77777778 0.76923077 0.81818182 0.61538462 0.875
|
|
0.66666667 0.78571429 0.78571429 0.71428571]
|
|
|
|
mean value: 0.7423340548340549
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.58333333 0.83333333 0.75 0.66666667 0.58333333
|
|
0.83333333 0.91666667 1. 0.83333333]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.70833333 0.79166667 0.79166667 0.625 0.75
|
|
0.70833333 0.83333333 0.875 0.73484848]
|
|
|
|
mean value: 0.7443181818181819
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.47058824 0.5 0.66666667 0.64285714 0.47058824 0.53846154
|
|
0.58823529 0.73333333 0.78571429 0.625 ]
|
|
|
|
mean value: 0.602144473173885
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01040649 0.01031017 0.01037312 0.00961804 0.01037526 0.01037502
|
|
0.01032877 0.01025653 0.0104363 0.00932646]
|
|
|
|
mean value: 0.01018061637878418
|
|
|
|
key: score_time
|
|
value: [0.00964999 0.00946546 0.00953627 0.00941133 0.00960279 0.00949907
|
|
0.00953937 0.00953007 0.00954676 0.0087235 ]
|
|
|
|
mean value: 0.009450459480285644
|
|
|
|
key: test_mcc
|
|
value: [0.16666667 0.2508726 0.27500955 0. 0.27500955 0.50709255
|
|
0.2508726 0.43033148 0.50460839 0.12878788]
|
|
|
|
mean value: 0.2789251277774354
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.58333333 0.625 0.625 0.5 0.625 0.75
|
|
0.625 0.70833333 0.69565217 0.56521739]
|
|
|
|
mean value: 0.6302536231884058
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.58333333 0.60869565 0.68965517 0.45454545 0.68965517 0.72727273
|
|
0.64 0.74074074 0.75862069 0.58333333]
|
|
|
|
mean value: 0.6475852275882261
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.58333333 0.63636364 0.58823529 0.5 0.58823529 0.8
|
|
0.61538462 0.66666667 0.61111111 0.58333333]
|
|
|
|
mean value: 0.617266328442799
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.58333333 0.58333333 0.83333333 0.41666667 0.83333333 0.66666667
|
|
0.66666667 0.83333333 1. 0.58333333]
|
|
|
|
mean value: 0.7000000000000001
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.625 0.625 0.5 0.625 0.75
|
|
0.625 0.70833333 0.70833333 0.56439394]
|
|
|
|
mean value: 0.631439393939394
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.41176471 0.4375 0.52631579 0.29411765 0.52631579 0.57142857
|
|
0.47058824 0.58823529 0.61111111 0.41176471]
|
|
|
|
mean value: 0.4849141849722345
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.35469699 1.33344936 1.3990047 1.37505436 1.3341949 1.35325098
|
|
1.33077955 1.33756208 1.384763 1.34119081]
|
|
|
|
mean value: 1.3543946743011475
|
|
|
|
key: score_time
|
|
value: [0.09228492 0.0974164 0.09831905 0.09852076 0.0970428 0.09776616
|
|
0.09751773 0.09144068 0.09766102 0.09835434]
|
|
|
|
mean value: 0.09663238525390624
|
|
|
|
key: test_mcc
|
|
value: [0.3380617 0.35355339 0.6761234 0.66666667 0.50709255 0.64168895
|
|
0.50709255 0.83333333 0.56818182 0.38932432]
|
|
|
|
mean value: 0.5481118688595472
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.83333333 0.83333333 0.75 0.79166667
|
|
0.75 0.91666667 0.7826087 0.69565217]
|
|
|
|
mean value: 0.7686594202898551
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.69230769 0.6 0.84615385 0.83333333 0.76923077 0.73684211
|
|
0.76923077 0.91666667 0.7826087 0.72 ]
|
|
|
|
mean value: 0.7666373877838408
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.64285714 0.75 0.78571429 0.83333333 0.71428571 1.
|
|
0.71428571 0.91666667 0.75 0.69230769]
|
|
|
|
mean value: 0.779945054945055
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.91666667 0.83333333 0.83333333 0.58333333
|
|
0.83333333 0.91666667 0.81818182 0.75 ]
|
|
|
|
mean value: 0.7734848484848484
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.66666667 0.83333333 0.83333333 0.75 0.79166667
|
|
0.75 0.91666667 0.78409091 0.69318182]
|
|
|
|
mean value: 0.768560606060606
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.52941176 0.42857143 0.73333333 0.71428571 0.625 0.58333333
|
|
0.625 0.84615385 0.64285714 0.5625 ]
|
|
|
|
mean value: 0.6290446563240681
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.82288122 0.93520522 0.86855769 0.89258552 0.9448607 0.90758777
|
|
0.90327692 0.97866297 0.88730645 0.90872741]
|
|
|
|
mean value: 0.9049651861190796
|
|
|
|
key: score_time
|
|
value: [0.22578979 0.19590878 0.17380428 0.18248248 0.17389655 0.20093942
|
|
0.26056457 0.23904443 0.20164275 0.25702143]
|
|
|
|
mean value: 0.21110944747924804
|
|
|
|
key: test_mcc
|
|
value: [0.41812101 0.35355339 0.60246408 0.83333333 0.43033148 0.64168895
|
|
0.41812101 0.83333333 0.65151515 0.48075018]
|
|
|
|
mean value: 0.5663211901063119
|
|
|
|
key: train_mcc
|
|
value: [0.93494699 0.90670046 0.90670046 0.90717617 0.91592785 0.89754911
|
|
0.89754911 0.92539531 0.89800878 0.91632053]
|
|
|
|
mean value: 0.9106274768697769
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.66666667 0.79166667 0.91666667 0.70833333 0.79166667
|
|
0.70833333 0.91666667 0.82608696 0.73913043]
|
|
|
|
mean value: 0.7773550724637681
|
|
|
|
key: train_accuracy
|
|
value: [0.96728972 0.95327103 0.95327103 0.95327103 0.95794393 0.94859813
|
|
0.94859813 0.96261682 0.94883721 0.95813953]
|
|
|
|
mean value: 0.9551836557270159
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.6 0.81481481 0.91666667 0.74074074 0.73684211
|
|
0.72 0.91666667 0.81818182 0.76923077]
|
|
|
|
mean value: 0.7728795755477678
|
|
|
|
key: train_fscore
|
|
value: [0.96774194 0.9537037 0.9537037 0.95412844 0.95813953 0.94930876
|
|
0.94930876 0.96296296 0.94977169 0.95813953]
|
|
|
|
mean value: 0.955690901700711
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.75 0.73333333 0.91666667 0.66666667 1.
|
|
0.69230769 0.91666667 0.81818182 0.71428571]
|
|
|
|
mean value: 0.7935381285381286
|
|
|
|
key: train_precision /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
value: [0.95454545 0.94495413 0.94495413 0.93693694 0.9537037 0.93636364
|
|
0.93636364 0.95412844 0.93693694 0.9537037 ]
|
|
|
|
mean value: 0.9452590705801716
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.5 0.91666667 0.91666667 0.83333333 0.58333333
|
|
0.75 0.91666667 0.81818182 0.83333333]
|
|
|
|
mean value: 0.7734848484848484
|
|
|
|
key: train_recall
|
|
value: [0.98130841 0.96261682 0.96261682 0.97196262 0.96261682 0.96261682
|
|
0.96261682 0.97196262 0.96296296 0.96261682]
|
|
|
|
mean value: 0.9663897542402216
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.66666667 0.79166667 0.91666667 0.70833333 0.79166667
|
|
0.70833333 0.91666667 0.82575758 0.73484848]
|
|
|
|
mean value: 0.7768939393939394
|
|
|
|
key: train_roc_auc
|
|
value: [0.96728972 0.95327103 0.95327103 0.95327103 0.95794393 0.94859813
|
|
0.94859813 0.96261682 0.9487712 0.95816026]
|
|
|
|
mean value: 0.9551791277258567
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.42857143 0.6875 0.84615385 0.58823529 0.58333333
|
|
0.5625 0.84615385 0.69230769 0.625 ]
|
|
|
|
mean value: 0.6393088773971127
|
|
|
|
key: train_jcc
|
|
value: [0.9375 0.91150442 0.91150442 0.9122807 0.91964286 0.90350877
|
|
0.90350877 0.92857143 0.90434783 0.91964286]
|
|
|
|
mean value: 0.9152012064115657
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02384639 0.00904751 0.00906181 0.00914288 0.0090673 0.00917602
|
|
0.0091517 0.01172256 0.01012325 0.00911999]
|
|
|
|
mean value: 0.010945940017700195
|
|
|
|
key: score_time
|
|
value: [0.01334524 0.00935245 0.00869036 0.00860286 0.00860476 0.00863767
|
|
0.00859475 0.00944138 0.00946784 0.00863409]
|
|
|
|
mean value: 0.009337139129638673
|
|
|
|
key: test_mcc
|
|
value: [0.41812101 0.16903085 0.58536941 0.58536941 0.2508726 0.58536941
|
|
0.5 0.50709255 0.76764947 0.50168817]
|
|
|
|
mean value: 0.4870562873788939
|
|
|
|
key: train_mcc
|
|
value: [0.59043763 0.63620901 0.62792574 0.61814664 0.65561007 0.62010797
|
|
0.63889912 0.63328843 0.60964859 0.64701307]
|
|
|
|
mean value: 0.6277286276584018
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.58333333 0.79166667 0.79166667 0.625 0.79166667
|
|
0.75 0.75 0.86956522 0.73913043]
|
|
|
|
mean value: 0.740036231884058
|
|
|
|
key: train_accuracy
|
|
value: [0.79439252 0.81775701 0.81308411 0.80841121 0.8271028 0.80841121
|
|
0.81775701 0.81308411 0.80465116 0.82325581]
|
|
|
|
mean value: 0.8127906976744186
|
|
|
|
key: test_fscore
|
|
value: [0.72 0.54545455 0.7826087 0.8 0.60869565 0.7826087
|
|
0.75 0.76923077 0.88 0.78571429]
|
|
|
|
mean value: 0.7424312643877861
|
|
|
|
key: train_fscore
|
|
value: [0.8018018 0.82191781 0.81981982 0.81447964 0.83257919 0.81777778
|
|
0.82666667 0.82608696 0.80909091 0.82568807]
|
|
|
|
mean value: 0.8195908636821799
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.6 0.81818182 0.76923077 0.63636364 0.81818182
|
|
0.75 0.71428571 0.78571429 0.6875 ]
|
|
|
|
mean value: 0.7271765734265734
|
|
|
|
key: train_precision
|
|
value: [0.77391304 0.80357143 0.79130435 0.78947368 0.80701754 0.77966102
|
|
0.78813559 0.77235772 0.79464286 0.81081081]
|
|
|
|
mean value: 0.7910888049646347
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.75 0.83333333 0.58333333 0.75
|
|
0.75 0.83333333 1. 0.91666667]
|
|
|
|
mean value: 0.7666666666666667
|
|
|
|
key: train_recall
|
|
value: [0.8317757 0.8411215 0.85046729 0.8411215 0.85981308 0.85981308
|
|
0.86915888 0.88785047 0.82407407 0.8411215 ]
|
|
|
|
mean value: 0.8506317064728279
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.58333333 0.79166667 0.79166667 0.625 0.79166667
|
|
0.75 0.75 0.875 0.73106061]
|
|
|
|
mean value: 0.7397727272727274
|
|
|
|
key: train_roc_auc
|
|
value: [0.79439252 0.81775701 0.81308411 0.80841121 0.8271028 0.80841121
|
|
0.81775701 0.81308411 0.8045604 0.82333853]
|
|
|
|
mean value: 0.8127898926964348
|
|
|
|
key: test_jcc
|
|
value: [0.5625 0.375 0.64285714 0.66666667 0.4375 0.64285714
|
|
0.6 0.625 0.78571429 0.64705882]
|
|
|
|
mean value: 0.598515406162465
|
|
|
|
key: train_jcc
|
|
value: [0.66917293 0.69767442 0.69465649 0.6870229 0.71317829 0.69172932
|
|
0.70454545 0.7037037 0.67938931 0.703125 ]
|
|
|
|
mean value: 0.6944197829356628
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.08058214 0.06786609 0.06809545 0.22957754 0.06823206 0.06266332
|
|
0.06188226 0.06003165 0.06411195 0.06521845]
|
|
|
|
mean value: 0.08282608985900879
|
|
|
|
key: score_time
|
|
value: [0.01041341 0.01041722 0.01038694 0.01292777 0.01117945 0.01045275
|
|
0.01058483 0.01038098 0.01039505 0.01027608]
|
|
|
|
mean value: 0.010741448402404786
|
|
|
|
key: test_mcc
|
|
value: [0.50709255 0.45834925 0.75261781 0.83333333 0.66666667 0.70710678
|
|
0.50709255 0.75261781 0.83971912 0.38932432]
|
|
|
|
mean value: 0.6413920196699461
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.70833333 0.875 0.91666667 0.83333333 0.83333333
|
|
0.75 0.875 0.91304348 0.69565217]
|
|
|
|
mean value: 0.8150362318840579
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.63157895 0.88 0.91666667 0.83333333 0.8
|
|
0.76923077 0.86956522 0.91666667 0.72 ]
|
|
|
|
mean value: 0.810627236988793
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.85714286 0.84615385 0.91666667 0.83333333 1.
|
|
0.71428571 0.90909091 0.84615385 0.69230769]
|
|
|
|
mean value: 0.8329420579420579
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.5 0.91666667 0.91666667 0.83333333 0.66666667
|
|
0.83333333 0.83333333 1. 0.75 ]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.70833333 0.875 0.91666667 0.83333333 0.83333333
|
|
0.75 0.875 0.91666667 0.69318182]
|
|
|
|
mean value: 0.8151515151515152
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.46153846 0.78571429 0.84615385 0.71428571 0.66666667
|
|
0.625 0.76923077 0.84615385 0.5625 ]
|
|
|
|
mean value: 0.690224358974359
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02749777 0.02816367 0.02974796 0.05736232 0.0272913 0.04481721
|
|
0.02785373 0.05785823 0.05839539 0.06007934]
|
|
|
|
mean value: 0.04190669059753418
|
|
|
|
key: score_time
|
|
value: [0.01219177 0.01208019 0.01197457 0.01192737 0.01195431 0.01191282
|
|
0.01195431 0.02337527 0.02111554 0.02088499]
|
|
|
|
mean value: 0.014937114715576173
|
|
|
|
key: test_mcc
|
|
value: [0.35355339 0.66666667 0.2508726 0.41812101 0.16666667 0.50709255
|
|
0.50709255 0.5 0.66414149 0.47727273]
|
|
|
|
mean value: 0.4511479652880026
|
|
|
|
key: train_mcc
|
|
value: [0.94458549 0.95364593 0.95331266 0.95331266 0.97200507 0.94392523
|
|
0.94409017 0.97234487 0.95385294 0.96295976]
|
|
|
|
mean value: 0.9554034791393476
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.625 0.70833333 0.58333333 0.75
|
|
0.75 0.75 0.82608696 0.73913043]
|
|
|
|
mean value: 0.7231884057971014
|
|
|
|
key: train_accuracy
|
|
value: [0.97196262 0.97663551 0.97663551 0.97663551 0.98598131 0.97196262
|
|
0.97196262 0.98598131 0.97674419 0.98139535]
|
|
|
|
mean value: 0.9775896544229515
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.83333333 0.64 0.69565217 0.58333333 0.76923077
|
|
0.76923077 0.75 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7338399426660296
|
|
|
|
key: train_fscore
|
|
value: [0.97247706 0.97630332 0.97652582 0.97652582 0.98604651 0.97196262
|
|
0.97222222 0.98617512 0.97716895 0.98148148]
|
|
|
|
mean value: 0.977688892208132
|
|
|
|
key: test_precision
|
|
value: [0.625 0.83333333 0.61538462 0.72727273 0.58333333 0.71428571
|
|
0.71428571 0.75 0.76923077 0.75 ]
|
|
|
|
mean value: 0.7082126207126207
|
|
|
|
key: train_precision
|
|
value: [0.95495495 0.99038462 0.98113208 0.98113208 0.98148148 0.97196262
|
|
0.96330275 0.97272727 0.96396396 0.97247706]
|
|
|
|
mean value: 0.9733518872791876
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.83333333 0.66666667 0.66666667 0.58333333 0.83333333
|
|
0.83333333 0.75 0.90909091 0.75 ]
|
|
|
|
mean value: 0.7659090909090909
|
|
|
|
key: train_recall
|
|
value: [0.99065421 0.96261682 0.97196262 0.97196262 0.99065421 0.97196262
|
|
0.98130841 1. 0.99074074 0.99065421]
|
|
|
|
mean value: 0.982251644167532
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.625 0.70833333 0.58333333 0.75
|
|
0.75 0.75 0.82954545 0.73863636]
|
|
|
|
mean value: 0.7234848484848485
|
|
|
|
key: train_roc_auc
|
|
value: [0.97196262 0.97663551 0.97663551 0.97663551 0.98598131 0.97196262
|
|
0.97196262 0.98598131 0.97667878 0.98143821]
|
|
|
|
mean value: 0.9775874004845967
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.71428571 0.47058824 0.53333333 0.41176471 0.625
|
|
0.625 0.6 0.71428571 0.6 ]
|
|
|
|
mean value: 0.5849813258636788
|
|
|
|
key: train_jcc
|
|
value: [0.94642857 0.9537037 0.95412844 0.95412844 0.97247706 0.94545455
|
|
0.94594595 0.97272727 0.95535714 0.96363636]
|
|
|
|
mean value: 0.9563987490707675
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02090168 0.00918961 0.00905538 0.00885677 0.00880861 0.00887942
|
|
0.00888872 0.00904346 0.00889969 0.00889063]
|
|
|
|
mean value: 0.010141396522521972
|
|
|
|
key: score_time
|
|
value: [0.00995135 0.00894046 0.00866508 0.00853658 0.00849724 0.00857306
|
|
0.00858712 0.00854015 0.00854492 0.00852847]
|
|
|
|
mean value: 0.008736443519592286
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.25819889 0.60246408 0.35355339 0.43033148 0.58536941
|
|
0.16903085 0.53033009 0.31298622 0.65151515]
|
|
|
|
mean value: 0.44791489601729073
|
|
|
|
key: train_mcc
|
|
value: [0.47330153 0.50970622 0.46615956 0.49962218 0.50843941 0.49962218
|
|
0.51728205 0.45436947 0.44644147 0.48631932]
|
|
|
|
mean value: 0.4861263378913578
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.625 0.79166667 0.66666667 0.70833333 0.79166667
|
|
0.58333333 0.75 0.65217391 0.82608696]
|
|
|
|
mean value: 0.7186594202898551
|
|
|
|
key: train_accuracy
|
|
value: [0.73364486 0.75233645 0.72897196 0.74766355 0.75233645 0.74766355
|
|
0.75700935 0.72429907 0.72093023 0.73953488]
|
|
|
|
mean value: 0.7404390349923929
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.66666667 0.76190476 0.71428571 0.74074074 0.8
|
|
0.61538462 0.78571429 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7384696784696785
|
|
|
|
key: train_fscore
|
|
value: [0.75324675 0.76855895 0.75213675 0.76315789 0.76651982 0.76315789
|
|
0.7699115 0.74458874 0.74137931 0.75862069]
|
|
|
|
mean value: 0.7581278319624325
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.6 0.88888889 0.625 0.66666667 0.76923077
|
|
0.57142857 0.6875 0.61538462 0.83333333]
|
|
|
|
mean value: 0.7026663614163614
|
|
|
|
key: train_precision
|
|
value: [0.7016129 0.72131148 0.69291339 0.71900826 0.725 0.71900826
|
|
0.73109244 0.69354839 0.69354839 0.704 ]
|
|
|
|
mean value: 0.7101043504556372
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.66666667 0.83333333 0.83333333 0.83333333
|
|
0.66666667 0.91666667 0.72727273 0.83333333]
|
|
|
|
mean value: 0.7893939393939394
|
|
|
|
key: train_recall
|
|
value: [0.81308411 0.82242991 0.82242991 0.81308411 0.81308411 0.81308411
|
|
0.81308411 0.80373832 0.7962963 0.82242991]
|
|
|
|
mean value: 0.8132744894427137
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.625 0.79166667 0.66666667 0.70833333 0.79166667
|
|
0.58333333 0.75 0.65530303 0.82575758]
|
|
|
|
mean value: 0.7189393939393939
|
|
|
|
key: train_roc_auc
|
|
value: [0.73364486 0.75233645 0.72897196 0.74766355 0.75233645 0.74766355
|
|
0.75700935 0.72429907 0.72057805 0.73991866]
|
|
|
|
mean value: 0.7404421945309796
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.5 0.61538462 0.55555556 0.58823529 0.66666667
|
|
0.44444444 0.64705882 0.5 0.71428571]
|
|
|
|
mean value: 0.5898297780650722
|
|
|
|
key: train_jcc
|
|
value: [0.60416667 0.62411348 0.60273973 0.61702128 0.62142857 0.61702128
|
|
0.62589928 0.59310345 0.5890411 0.61111111]
|
|
|
|
mean value: 0.6105645928344353
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01252055 0.01392078 0.01614141 0.01381683 0.0134027 0.01844454
|
|
0.01378655 0.01552129 0.01536298 0.01578856]
|
|
|
|
mean value: 0.014870619773864746
|
|
|
|
key: score_time
|
|
value: [0.00945973 0.01135755 0.01148272 0.01177406 0.01144099 0.01148391
|
|
0.01179481 0.01168394 0.01170611 0.01175261]
|
|
|
|
mean value: 0.01139364242553711
|
|
|
|
key: test_mcc
|
|
value: [0.43033148 0.43033148 0.64168895 0.37796447 0. 0.66666667
|
|
0.51298918 0.38490018 0.31252706 0.58002308]
|
|
|
|
mean value: 0.4337422539152132
|
|
|
|
key: train_mcc
|
|
value: [0.765559 0.66399158 0.81308411 0.56655772 0.45720843 0.74210824
|
|
0.50793174 0.66048589 0.67173227 0.66970965]
|
|
|
|
mean value: 0.6518368627712635
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.70833333 0.79166667 0.625 0.5 0.83333333
|
|
0.70833333 0.66666667 0.65217391 0.7826087 ]
|
|
|
|
mean value: 0.6976449275362319
|
|
|
|
key: train_accuracy
|
|
value: [0.87383178 0.81775701 0.90654206 0.74299065 0.6728972 0.85514019
|
|
0.71028037 0.80373832 0.81395349 0.80930233]
|
|
|
|
mean value: 0.8006433384046946
|
|
|
|
key: test_fscore
|
|
value: [0.74074074 0.74074074 0.82758621 0.4 0.25 0.83333333
|
|
0.58823529 0.73333333 0.55555556 0.81481481]
|
|
|
|
mean value: 0.6484340019532717
|
|
|
|
key: train_fscore
|
|
value: [0.88607595 0.84081633 0.90654206 0.65408805 0.51388889 0.87346939
|
|
0.5974026 0.8359375 0.7752809 0.83921569]
|
|
|
|
mean value: 0.7722717341484435
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.70588235 1. 0.5 0.83333333
|
|
1. 0.61111111 0.71428571 0.73333333]
|
|
|
|
mean value: 0.7431279178338002
|
|
|
|
key: train_precision
|
|
value: [0.80769231 0.74637681 0.90654206 1. 1. 0.77536232
|
|
0.9787234 0.71812081 0.98571429 0.72297297]
|
|
|
|
mean value: 0.8641504962513562
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.83333333 1. 0.25 0.16666667 0.83333333
|
|
0.41666667 0.91666667 0.45454545 0.91666667]
|
|
|
|
mean value: 0.6621212121212121
|
|
|
|
key: train_recall
|
|
value: [0.98130841 0.96261682 0.90654206 0.48598131 0.34579439 1.
|
|
0.42990654 1. 0.63888889 1. ]
|
|
|
|
mean value: 0.7751038421599169
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.79166667 0.625 0.5 0.83333333
|
|
0.70833333 0.66666667 0.64393939 0.77651515]
|
|
|
|
mean value: 0.6962121212121212
|
|
|
|
key: train_roc_auc
|
|
value: [0.87383178 0.81775701 0.90654206 0.74299065 0.6728972 0.85514019
|
|
0.71028037 0.80373832 0.81477155 0.81018519]
|
|
|
|
mean value: 0.8008134302526826
|
|
|
|
key: test_jcc
|
|
value: [0.58823529 0.58823529 0.70588235 0.25 0.14285714 0.71428571
|
|
0.41666667 0.57894737 0.38461538 0.6875 ]
|
|
|
|
mean value: 0.5057225218022432
|
|
|
|
key: train_jcc
|
|
value: [0.79545455 0.72535211 0.82905983 0.48598131 0.34579439 0.77536232
|
|
0.42592593 0.71812081 0.63302752 0.72297297]
|
|
|
|
mean value: 0.6457051734169397
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01544642 0.01680565 0.01688099 0.01658344 0.01468253 0.01595712
|
|
0.01408386 0.01546359 0.01462984 0.01517892]
|
|
|
|
mean value: 0.015571236610412598
|
|
|
|
key: score_time
|
|
value: [0.01171613 0.0115025 0.0115025 0.01150274 0.01146626 0.01148009
|
|
0.01147723 0.01166582 0.01146317 0.01148224]
|
|
|
|
mean value: 0.011525869369506836
|
|
|
|
key: test_mcc
|
|
value: [0.43033148 0.45834925 0.70710678 0.66666667 0.33333333 0.75261781
|
|
0.30779351 0.83333333 0.63327851 0.47727273]
|
|
|
|
mean value: 0.5600083394249354
|
|
|
|
key: train_mcc
|
|
value: [0.63278485 0.83571089 0.84292723 0.86075337 0.80587729 0.73455316
|
|
0.69721669 0.78155517 0.60343274 0.83255452]
|
|
|
|
mean value: 0.7627365913034355
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.70833333 0.83333333 0.83333333 0.66666667 0.875
|
|
0.625 0.91666667 0.7826087 0.73913043]
|
|
|
|
mean value: 0.768840579710145
|
|
|
|
key: train_accuracy
|
|
value: [0.78971963 0.91121495 0.92056075 0.92990654 0.89719626 0.85046729
|
|
0.8271028 0.88317757 0.76744186 0.91627907]
|
|
|
|
mean value: 0.8693066724625081
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.63157895 0.85714286 0.83333333 0.66666667 0.88
|
|
0.70967742 0.91666667 0.81481481 0.75 ]
|
|
|
|
mean value: 0.7726547372014265
|
|
|
|
key: train_fscore
|
|
value: [0.73684211 0.9025641 0.92307692 0.93150685 0.8877551 0.8699187
|
|
0.85258964 0.87046632 0.81203008 0.91588785]
|
|
|
|
mean value: 0.8702637669780106
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.85714286 0.75 0.83333333 0.66666667 0.84615385
|
|
0.57894737 0.91666667 0.6875 0.75 ]
|
|
|
|
mean value: 0.76641885161622
|
|
|
|
key: train_precision
|
|
value: [0.984375 1. 0.89473684 0.91071429 0.97752809 0.76978417
|
|
0.74305556 0.97674419 0.6835443 0.91588785]
|
|
|
|
mean value: 0.8856370286235885
|
|
|
|
key: test_recall
|
|
value: [0.58333333 0.5 1. 0.83333333 0.66666667 0.91666667
|
|
0.91666667 0.91666667 1. 0.75 ]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [0.58878505 0.82242991 0.95327103 0.95327103 0.81308411 1.
|
|
1. 0.78504673 1. 0.91588785]
|
|
|
|
mean value: 0.8831775700934579
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.83333333 0.83333333 0.66666667 0.875
|
|
0.625 0.91666667 0.79166667 0.73863636]
|
|
|
|
mean value: 0.7696969696969697
|
|
|
|
key: train_roc_auc
|
|
value: [0.78971963 0.91121495 0.92056075 0.92990654 0.89719626 0.85046729
|
|
0.8271028 0.88317757 0.76635514 0.91627726]
|
|
|
|
mean value: 0.8691978193146417
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.46153846 0.75 0.71428571 0.5 0.78571429
|
|
0.55 0.84615385 0.6875 0.6 ]
|
|
|
|
mean value: 0.6395192307692308
|
|
|
|
key: train_jcc
|
|
value: [0.58333333 0.82242991 0.85714286 0.87179487 0.79816514 0.76978417
|
|
0.74305556 0.7706422 0.6835443 0.84482759]
|
|
|
|
mean value: 0.774471992648445
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13520432 0.11692595 0.11517644 0.1157937 0.11509657 0.11439133
|
|
0.11565018 0.11544991 0.11597919 0.11625528]
|
|
|
|
mean value: 0.11759228706359863
|
|
|
|
key: score_time
|
|
value: [0.01483297 0.01478171 0.01470661 0.01475096 0.0147438 0.01558471
|
|
0.01485181 0.01469231 0.01466393 0.0147202 ]
|
|
|
|
mean value: 0.014832901954650878
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.35355339 0.6761234 0.58536941 0.50709255 0.60246408
|
|
0.66666667 0.58536941 0.74047959 0.38932432]
|
|
|
|
mean value: 0.5691812221593513
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.66666667 0.83333333 0.79166667 0.75 0.79166667
|
|
0.83333333 0.79166667 0.86956522 0.69565217]
|
|
|
|
mean value: 0.7815217391304348
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.6 0.84615385 0.8 0.76923077 0.76190476
|
|
0.83333333 0.7826087 0.85714286 0.72 ]
|
|
|
|
mean value: 0.7752982959069915
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.75 0.78571429 0.76923077 0.71428571 0.88888889
|
|
0.83333333 0.81818182 0.9 0.69230769]
|
|
|
|
mean value: 0.797012432012432
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.91666667 0.83333333 0.83333333 0.66666667
|
|
0.83333333 0.75 0.81818182 0.75 ]
|
|
|
|
mean value: 0.7651515151515151
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.66666667 0.83333333 0.79166667 0.75 0.79166667
|
|
0.83333333 0.79166667 0.86742424 0.69318182]
|
|
|
|
mean value: 0.7810606060606061
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.42857143 0.73333333 0.66666667 0.625 0.61538462
|
|
0.71428571 0.64285714 0.75 0.5625 ]
|
|
|
|
mean value: 0.6381456043956044
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04348755 0.04252839 0.04224372 0.06066871 0.0431087 0.06443048
|
|
0.05952501 0.05365348 0.04849029 0.05835891]
|
|
|
|
mean value: 0.05164952278137207
|
|
|
|
key: score_time
|
|
value: [0.02122927 0.01823187 0.01750135 0.03620076 0.02824354 0.03714728
|
|
0.02844954 0.02909589 0.03381586 0.02994609]
|
|
|
|
mean value: 0.02798614501953125
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.35355339 0.58536941 0.53033009 0.0836242 0.35355339
|
|
0.75261781 0.3380617 0.65151515 0.47727273]
|
|
|
|
mean value: 0.47112672717727616
|
|
|
|
key: train_mcc
|
|
value: [0.98147988 0.99069747 0.95431352 0.97200507 0.99069747 1.
|
|
0.98147988 0.96329016 0.98156643 0.98156326]
|
|
|
|
mean value: 0.9797093130637349
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.66666667 0.79166667 0.75 0.54166667 0.66666667
|
|
0.875 0.66666667 0.82608696 0.73913043]
|
|
|
|
mean value: 0.7315217391304347
|
|
|
|
key: train_accuracy
|
|
value: [0.99065421 0.9953271 0.97663551 0.98598131 0.9953271 1.
|
|
0.99065421 0.98130841 0.99069767 0.99069767]
|
|
|
|
mean value: 0.98972831993045
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.6 0.8 0.7 0.52173913 0.6
|
|
0.86956522 0.63636364 0.81818182 0.75 ]
|
|
|
|
mean value: 0.7078458498023715
|
|
|
|
key: train_fscore
|
|
value: [0.99056604 0.99530516 0.97607656 0.98591549 0.99530516 1.
|
|
0.99056604 0.98095238 0.99065421 0.99056604]
|
|
|
|
mean value: 0.9895907076387572
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.75 0.76923077 0.875 0.54545455 0.75
|
|
0.90909091 0.7 0.81818182 0.75 ]
|
|
|
|
mean value: 0.768513986013986
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.99056604 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990566037735849
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.83333333 0.58333333 0.5 0.5
|
|
0.83333333 0.58333333 0.81818182 0.75 ]
|
|
|
|
mean value: 0.6651515151515152
|
|
|
|
key: train_recall
|
|
value: [0.98130841 0.99065421 0.95327103 0.98130841 0.99065421 1.
|
|
0.98130841 0.96261682 0.98148148 0.98130841]
|
|
|
|
mean value: 0.9803911388023537
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.66666667 0.79166667 0.75 0.54166667 0.66666667
|
|
0.875 0.66666667 0.82575758 0.73863636]
|
|
|
|
mean value: 0.731439393939394
|
|
|
|
key: train_roc_auc
|
|
value: [0.99065421 0.9953271 0.97663551 0.98598131 0.9953271 1.
|
|
0.99065421 0.98130841 0.99074074 0.99065421]
|
|
|
|
mean value: 0.9897282796815506
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.42857143 0.66666667 0.53846154 0.35294118 0.42857143
|
|
0.76923077 0.46666667 0.69230769 0.6 ]
|
|
|
|
mean value: 0.5586274509803921
|
|
|
|
key: train_jcc
|
|
value: [0.98130841 0.99065421 0.95327103 0.97222222 0.99065421 1.
|
|
0.98130841 0.96261682 0.98148148 0.98130841]
|
|
|
|
mean value: 0.9794825199030807
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02282429 0.02679276 0.02753735 0.02729583 0.02739811 0.06211233
|
|
0.07006454 0.06247091 0.065027 0.07080889]
|
|
|
|
mean value: 0.046233201026916505
|
|
|
|
key: score_time
|
|
value: [0.01267719 0.01270986 0.02094722 0.01265335 0.01270294 0.02475357
|
|
0.0230341 0.02312326 0.02105856 0.02447557]
|
|
|
|
mean value: 0.018813562393188477
|
|
|
|
key: test_mcc
|
|
value: [0.2508726 0.3380617 0.3380617 0.58536941 0.16903085 0.58536941
|
|
0.35355339 0.43033148 0.5164589 0.31252706]
|
|
|
|
mean value: 0.3879636502859699
|
|
|
|
key: train_mcc
|
|
value: [0.99069747 0.99069747 0.99069747 0.99069747 0.99069747 0.99069747
|
|
0.98147988 0.99069747 0.99073994 0.99074074]
|
|
|
|
mean value: 0.98978428681763
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.66666667 0.66666667 0.79166667 0.58333333 0.79166667
|
|
0.66666667 0.70833333 0.73913043 0.65217391]
|
|
|
|
mean value: 0.6891304347826087
|
|
|
|
key: train_accuracy
|
|
value: [0.9953271 0.9953271 0.9953271 0.9953271 0.9953271 0.9953271
|
|
0.99065421 0.9953271 0.99534884 0.99534884]
|
|
|
|
mean value: 0.994864159965225
|
|
|
|
key: test_fscore
|
|
value: [0.64 0.63636364 0.69230769 0.8 0.61538462 0.7826087
|
|
0.71428571 0.74074074 0.76923077 0.71428571]
|
|
|
|
mean value: 0.7105207578251057
|
|
|
|
key: train_fscore
|
|
value: [0.99534884 0.99534884 0.99534884 0.99534884 0.99534884 0.99534884
|
|
0.99074074 0.99534884 0.99539171 0.99534884]
|
|
|
|
mean value: 0.9948923143484284
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.7 0.64285714 0.76923077 0.57142857 0.81818182
|
|
0.625 0.66666667 0.66666667 0.625 ]
|
|
|
|
mean value: 0.670041625041625
|
|
|
|
key: train_precision
|
|
value: [0.99074074 0.99074074 0.99074074 0.99074074 0.99074074 0.99074074
|
|
0.98165138 0.99074074 0.99082569 0.99074074]
|
|
|
|
mean value: 0.9898402990146109
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.58333333 0.75 0.83333333 0.66666667 0.75
|
|
0.83333333 0.83333333 0.90909091 0.83333333]
|
|
|
|
mean value: 0.7659090909090909
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.66666667 0.66666667 0.79166667 0.58333333 0.79166667
|
|
0.66666667 0.70833333 0.74621212 0.64393939]
|
|
|
|
mean value: 0.6890151515151515
|
|
|
|
key: train_roc_auc
|
|
value: [0.9953271 0.9953271 0.9953271 0.9953271 0.9953271 0.9953271
|
|
0.99065421 0.9953271 0.9953271 0.99537037]
|
|
|
|
mean value: 0.9948641398407753
|
|
|
|
key: test_jcc
|
|
value: [0.47058824 0.46666667 0.52941176 0.66666667 0.44444444 0.64285714
|
|
0.55555556 0.58823529 0.625 0.55555556]
|
|
|
|
mean value: 0.5544981325863679
|
|
|
|
key: train_jcc
|
|
value: [0.99074074 0.99074074 0.99074074 0.99074074 0.99074074 0.99074074
|
|
0.98165138 0.99074074 0.99082569 0.99074074]
|
|
|
|
mean value: 0.9898402990146109
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.39302421 0.38781261 0.38132334 0.38107753 0.38223696 0.38157129
|
|
0.38028026 0.38978195 0.38478231 0.38798022]
|
|
|
|
mean value: 0.38498706817626954
|
|
|
|
key: score_time
|
|
value: [0.00929594 0.00920892 0.00926375 0.00939751 0.00947452 0.00936222
|
|
0.00934148 0.00942659 0.00945687 0.00921726]
|
|
|
|
mean value: 0.00934450626373291
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.3380617 0.60246408 0.75261781 0.83333333 0.60246408
|
|
0.58536941 0.75261781 0.66414149 0.38932432]
|
|
|
|
mean value: 0.6187060687615544
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.66666667 0.79166667 0.875 0.91666667 0.79166667
|
|
0.79166667 0.875 0.82608696 0.69565217]
|
|
|
|
mean value: 0.806340579710145
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.63636364 0.81481481 0.88 0.91666667 0.76190476
|
|
0.8 0.86956522 0.83333333 0.72 ]
|
|
|
|
mean value: 0.806598176380785
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.7 0.73333333 0.84615385 0.91666667 0.88888889
|
|
0.76923077 0.90909091 0.76923077 0.69230769]
|
|
|
|
mean value: 0.8058236208236208
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.58333333 0.91666667 0.91666667 0.91666667 0.66666667
|
|
0.83333333 0.83333333 0.90909091 0.75 ]
|
|
|
|
mean value: 0.8159090909090909
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.66666667 0.79166667 0.875 0.91666667 0.79166667
|
|
0.79166667 0.875 0.82954545 0.69318182]
|
|
|
|
mean value: 0.8064393939393939
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.46666667 0.6875 0.78571429 0.84615385 0.61538462
|
|
0.66666667 0.76923077 0.71428571 0.5625 ]
|
|
|
|
mean value: 0.6828388278388279
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02136946 0.02109194 0.02217722 0.02191544 0.02162218 0.02250242
|
|
0.0231998 0.03056288 0.02152514 0.02213955]
|
|
|
|
mean value: 0.022810602188110353
|
|
|
|
key: score_time
|
|
value: [0.01971126 0.01300335 0.01919699 0.01460743 0.01475883 0.01430082
|
|
0.01233721 0.01231742 0.01723862 0.01476789]
|
|
|
|
mean value: 0.015223979949951172
|
|
|
|
key: test_mcc
|
|
value: [0.35355339 0.5 0.41812101 0.25819889 0.3380617 0.66666667
|
|
0.43033148 0.3380617 0.3030303 0.04545455]
|
|
|
|
mean value: 0.3651479687190244
|
|
|
|
key: train_mcc
|
|
value: [1. 0.94541277 0.98147988 0.95431352 0.90197523 1.
|
|
0.97234487 1. 0.91088773 0.98156643]
|
|
|
|
mean value: 0.9647980425405398
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.75 0.70833333 0.625 0.66666667 0.83333333
|
|
0.70833333 0.66666667 0.65217391 0.52173913]
|
|
|
|
mean value: 0.6798913043478261
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.97196262 0.99065421 0.97663551 0.94859813 1.
|
|
0.98598131 1. 0.95348837 0.99069767]
|
|
|
|
mean value: 0.9818017822212562
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.75 0.69565217 0.66666667 0.63636364 0.83333333
|
|
0.74074074 0.69230769 0.63636364 0.52173913]
|
|
|
|
mean value: 0.6887452724409246
|
|
|
|
key: train_fscore
|
|
value: [1. 0.97272727 0.99074074 0.97716895 0.95111111 1.
|
|
0.98617512 1. 0.95575221 0.99074074]
|
|
|
|
mean value: 0.9824416142688309
|
|
|
|
key: test_precision
|
|
value: [0.625 0.75 0.72727273 0.6 0.7 0.83333333
|
|
0.66666667 0.64285714 0.63636364 0.54545455]
|
|
|
|
mean value: 0.6726948051948052
|
|
|
|
key: train_precision
|
|
value: [1. 0.94690265 0.98165138 0.95535714 0.90677966 1.
|
|
0.97272727 1. 0.91525424 0.98165138]
|
|
|
|
mean value: 0.9660323721050335
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.66666667 0.75 0.58333333 0.83333333
|
|
0.83333333 0.75 0.63636364 0.5 ]
|
|
|
|
mean value: 0.7136363636363636
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.75 0.70833333 0.625 0.66666667 0.83333333
|
|
0.70833333 0.66666667 0.65151515 0.52272727]
|
|
|
|
mean value: 0.6799242424242424
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.97196262 0.99065421 0.97663551 0.94859813 1.
|
|
0.98598131 1. 0.95327103 0.99074074]
|
|
|
|
mean value: 0.9817843544479058
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.6 0.53333333 0.5 0.46666667 0.71428571
|
|
0.58823529 0.52941176 0.46666667 0.35294118]
|
|
|
|
mean value: 0.5307096171802054
|
|
|
|
key: train_jcc
|
|
value: [1. 0.94690265 0.98165138 0.95535714 0.90677966 1.
|
|
0.97272727 1. 0.91525424 0.98165138]
|
|
|
|
mean value: 0.9660323721050335
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02252746 0.03434563 0.03585696 0.03267336 0.03483081 0.03434944
|
|
0.03433037 0.02833104 0.03430033 0.03429699]
|
|
|
|
mean value: 0.03258423805236817
|
|
|
|
key: score_time
|
|
value: [0.01528716 0.02240324 0.02809668 0.02184415 0.0218873 0.02381444
|
|
0.02386975 0.02300072 0.020684 0.02241898]
|
|
|
|
mean value: 0.022330641746520996
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.6761234 0.41812101 0.58536941 0.35355339 0.50709255
|
|
0.50709255 0.66666667 0.74242424 0.39393939]
|
|
|
|
mean value: 0.5183715948422453
|
|
|
|
key: train_mcc
|
|
value: [0.87052859 0.89723545 0.87885017 0.8884715 0.88785047 0.88785047
|
|
0.87052859 0.88039066 0.84360068 0.86233346]
|
|
|
|
mean value: 0.876764004971511
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.70833333 0.79166667 0.66666667 0.75
|
|
0.75 0.83333333 0.86956522 0.69565217]
|
|
|
|
mean value: 0.7565217391304347
|
|
|
|
key: train_accuracy
|
|
value: [0.93457944 0.94859813 0.93925234 0.94392523 0.94392523 0.94392523
|
|
0.93457944 0.93925234 0.92093023 0.93023256]
|
|
|
|
mean value: 0.9379200173875245
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.81818182 0.72 0.8 0.71428571 0.72727273
|
|
0.76923077 0.83333333 0.86956522 0.69565217]
|
|
|
|
mean value: 0.7614188420275376
|
|
|
|
key: train_fscore
|
|
value: [0.93636364 0.94883721 0.94009217 0.94495413 0.94392523 0.94392523
|
|
0.93636364 0.94117647 0.92376682 0.9321267 ]
|
|
|
|
mean value: 0.9391531227222615
|
|
|
|
key: test_precision
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:175: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:178: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.66666667 0.9 0.69230769 0.76923077 0.625 0.8
|
|
0.71428571 0.83333333 0.83333333 0.72727273]
|
|
|
|
mean value: 0.7561430236430237
|
|
|
|
key: train_precision
|
|
value: [0.91150442 0.94444444 0.92727273 0.92792793 0.94392523 0.94392523
|
|
0.91150442 0.9122807 0.89565217 0.90350877]
|
|
|
|
mean value: 0.9221946064089596
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.75 0.75 0.83333333 0.83333333 0.66666667
|
|
0.83333333 0.83333333 0.90909091 0.66666667]
|
|
|
|
mean value: 0.7742424242424243
|
|
|
|
key: train_recall
|
|
value: [0.96261682 0.95327103 0.95327103 0.96261682 0.94392523 0.94392523
|
|
0.96261682 0.97196262 0.9537037 0.96261682]
|
|
|
|
mean value: 0.9570526133610245
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.70833333 0.79166667 0.66666667 0.75
|
|
0.75 0.83333333 0.87121212 0.6969697 ]
|
|
|
|
mean value: 0.7568181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [0.93457944 0.94859813 0.93925234 0.94392523 0.94392523 0.94392523
|
|
0.93457944 0.93925234 0.92077709 0.93038249]
|
|
|
|
mean value: 0.9379196953963309
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.69230769 0.5625 0.66666667 0.55555556 0.57142857
|
|
0.625 0.71428571 0.76923077 0.53333333]
|
|
|
|
mean value: 0.6190308302808303
|
|
|
|
key: train_jcc
|
|
value: [0.88034188 0.90265487 0.88695652 0.89565217 0.89380531 0.89380531
|
|
0.88034188 0.88888889 0.85833333 0.87288136]
|
|
|
|
mean value: 0.8853661521216024
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.22179174 0.28168225 0.26826978 0.22097516 0.23485756 0.22735858
|
|
0.23563886 0.23156381 0.22941089 0.23866081]
|
|
|
|
mean value: 0.2390209436416626
|
|
|
|
key: score_time
|
|
value: [0.0200696 0.02238464 0.02189493 0.02158761 0.02371407 0.02026796
|
|
0.02035952 0.020437 0.02219439 0.02334285]
|
|
|
|
mean value: 0.021625256538391112
|
|
|
|
key: test_mcc
|
|
value: [0.33333333 0.66666667 0.6761234 0.75261781 0.60246408 0.43033148
|
|
0.53033009 0.60246408 0.74242424 0.58002308]
|
|
|
|
mean value: 0.5916778251293104
|
|
|
|
key: train_mcc
|
|
value: [0.77207467 0.76181538 0.76033717 0.75032247 0.77692337 0.76908054
|
|
0.77043718 0.74300512 0.77789466 0.78889274]
|
|
|
|
mean value: 0.7670783288690242
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.83333333 0.83333333 0.875 0.79166667 0.70833333
|
|
0.75 0.79166667 0.86956522 0.7826087 ]
|
|
|
|
mean value: 0.7902173913043479
|
|
|
|
key: train_accuracy
|
|
value: [0.88317757 0.87850467 0.87850467 0.87383178 0.88785047 0.88317757
|
|
0.88317757 0.86915888 0.88837209 0.89302326]
|
|
|
|
mean value: 0.8818778526407303
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.83333333 0.84615385 0.86956522 0.81481481 0.66666667
|
|
0.78571429 0.81481481 0.86956522 0.81481481]
|
|
|
|
mean value: 0.7982109677761852
|
|
|
|
key: train_fscore
|
|
value: [0.88986784 0.88495575 0.88392857 0.87892377 0.89090909 0.88789238
|
|
0.88888889 0.87610619 0.89189189 0.89686099]
|
|
|
|
mean value: 0.8870225361475632
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.83333333 0.78571429 0.90909091 0.73333333 0.77777778
|
|
0.6875 0.73333333 0.83333333 0.73333333]
|
|
|
|
mean value: 0.7693416305916305
|
|
|
|
key: train_precision
|
|
value: [0.84166667 0.84033613 0.84615385 0.84482759 0.86725664 0.85344828
|
|
0.84745763 0.83193277 0.86842105 0.86206897]
|
|
|
|
mean value: 0.8503569564888109
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.83333333 0.91666667 0.83333333 0.91666667 0.58333333
|
|
0.91666667 0.91666667 0.90909091 0.91666667]
|
|
|
|
mean value: 0.8409090909090909
|
|
|
|
key: train_recall
|
|
value: [0.94392523 0.93457944 0.92523364 0.91588785 0.91588785 0.92523364
|
|
0.93457944 0.92523364 0.91666667 0.93457944]
|
|
|
|
mean value: 0.9271806853582555
|
|
|
|
key: test_roc_auc
|
|
value: [0.66666667 0.83333333 0.83333333 0.875 0.79166667 0.70833333
|
|
0.75 0.79166667 0.87121212 0.77651515]
|
|
|
|
mean value: 0.7897727272727273
|
|
|
|
key: train_roc_auc
|
|
value: [0.88317757 0.87850467 0.87850467 0.87383178 0.88785047 0.88317757
|
|
0.88317757 0.86915888 0.88823988 0.89321565]
|
|
|
|
mean value: 0.8818838698511595
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.71428571 0.73333333 0.76923077 0.6875 0.5
|
|
0.64705882 0.6875 0.76923077 0.6875 ]
|
|
|
|
mean value: 0.6695639409609998
|
|
|
|
key: train_jcc
|
|
value: [0.8015873 0.79365079 0.792 0.784 0.80327869 0.7983871
|
|
0.8 0.77952756 0.80487805 0.81300813]
|
|
|
|
mean value: 0.7970317618453786
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.030931 0.0320158 0.02927685 0.03117085 0.03132129 0.02582955
|
|
0.0295136 0.03285122 0.03214002 0.02977347]
|
|
|
|
mean value: 0.0304823637008667
|
|
|
|
key: score_time
|
|
value: [0.01595521 0.01361537 0.01179814 0.01176691 0.01180816 0.0117135
|
|
0.01171279 0.01357198 0.01349711 0.01170993]
|
|
|
|
mean value: 0.012714910507202148
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.60246408 0.70710678 0.53033009 0.70710678 0.2508726
|
|
0.77459667 0.75261781 0.75261781 0.5 ]
|
|
|
|
mean value: 0.6163082021601053
|
|
|
|
key: train_mcc
|
|
value: [0.74278135 0.81607516 0.78869542 0.81537425 0.77992042 0.80725296
|
|
0.82495863 0.81607516 0.81537425 0.79848995]
|
|
|
|
mean value: 0.8004997563631071
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.79166667 0.83333333 0.75 0.83333333 0.625
|
|
0.875 0.875 0.875 0.75 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_accuracy
|
|
value: [0.87037037 0.90740741 0.89351852 0.90740741 0.88888889 0.90277778
|
|
0.91203704 0.90740741 0.90740741 0.89814815]
|
|
|
|
mean value: 0.899537037037037
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.76190476 0.85714286 0.78571429 0.85714286 0.64
|
|
0.85714286 0.86956522 0.88 0.75 ]
|
|
|
|
mean value: 0.8041221532091097
|
|
|
|
key: train_fscore
|
|
value: [0.875 0.90990991 0.89686099 0.90909091 0.89285714 0.9058296
|
|
0.91402715 0.90990991 0.90909091 0.90178571]
|
|
|
|
mean value: 0.9024362227425403
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.88888889 0.75 0.6875 0.75 0.61538462
|
|
1. 0.90909091 0.84615385 0.75 ]
|
|
|
|
mean value: 0.8015200077700078
|
|
|
|
key: train_precision
|
|
value: [0.84482759 0.88596491 0.86956522 0.89285714 0.86206897 0.87826087
|
|
0.89380531 0.88596491 0.89285714 0.87068966]
|
|
|
|
mean value: 0.8776861713863275
|
|
|
|
key: test_recall
|
|
value: [0.75 0.66666667 1. 0.91666667 1. 0.66666667
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [0.90740741 0.93518519 0.92592593 0.92592593 0.92592593 0.93518519
|
|
0.93518519 0.93518519 0.92592593 0.93518519]
|
|
|
|
mean value: 0.9287037037037037
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.79166667 0.83333333 0.75 0.83333333 0.625
|
|
0.875 0.875 0.875 0.75 ]
|
|
|
|
mean value: 0.7999999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.87037037 0.90740741 0.89351852 0.90740741 0.88888889 0.90277778
|
|
0.91203704 0.90740741 0.90740741 0.89814815]
|
|
|
|
mean value: 0.899537037037037
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.61538462 0.75 0.64705882 0.75 0.47058824
|
|
0.75 0.76923077 0.78571429 0.6 ]
|
|
|
|
mean value: 0.6780833872010342
|
|
|
|
key: train_jcc
|
|
value: [0.77777778 0.83471074 0.81300813 0.83333333 0.80645161 0.82786885
|
|
0.84166667 0.83471074 0.83333333 0.82113821]
|
|
|
|
mean value: 0.8223999405540073
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.73983574 0.90101838 0.73272038 0.73073149 0.88028049 0.73427486
|
|
0.73257947 0.83858824 0.72452283 0.72177792]
|
|
|
|
mean value: 0.7736329793930053
|
|
|
|
key: score_time
|
|
value: [0.01196504 0.01196766 0.01193738 0.012146 0.01205468 0.01202273
|
|
0.01198864 0.01195478 0.01201153 0.01198506]
|
|
|
|
mean value: 0.012003350257873534
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.75261781 0.70710678 0.53033009 0.70710678 0.25819889
|
|
0.6761234 0.66666667 0.75261781 0.6761234 ]
|
|
|
|
mean value: 0.639355829692189
|
|
|
|
key: train_mcc
|
|
value: [0.74535599 0.74393663 0.76253505 0.78788184 0.72421182 0.73403465
|
|
0.85243671 0.71818485 0.79684302 0.73403465]
|
|
|
|
mean value: 0.7599455220153282
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.875 0.83333333 0.75 0.83333333 0.625
|
|
0.83333333 0.83333333 0.875 0.83333333]
|
|
|
|
mean value: 0.8125
|
|
|
|
key: train_accuracy
|
|
value: [0.87037037 0.87037037 0.87962963 0.89351852 0.86111111 0.86574074
|
|
0.92592593 0.85648148 0.89814815 0.86574074]
|
|
|
|
mean value: 0.8787037037037037
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.86956522 0.85714286 0.78571429 0.85714286 0.66666667
|
|
0.81818182 0.83333333 0.88 0.84615385]
|
|
|
|
mean value: 0.8247234215060302
|
|
|
|
key: train_fscore
|
|
value: [0.87719298 0.87610619 0.88495575 0.8959276 0.86607143 0.87111111
|
|
0.92727273 0.86462882 0.9 0.87111111]
|
|
|
|
mean value: 0.8834377730195826
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.90909091 0.75 0.6875 0.75 0.6
|
|
0.9 0.83333333 0.84615385 0.78571429]
|
|
|
|
mean value: 0.7895125707625708
|
|
|
|
key: train_precision
|
|
value: [0.83333333 0.83898305 0.84745763 0.87610619 0.8362069 0.83760684
|
|
0.91071429 0.81818182 0.88392857 0.83760684]
|
|
|
|
mean value: 0.8520125453079775
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.83333333 1. 0.91666667 1. 0.75
|
|
0.75 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_recall
|
|
value: [0.92592593 0.91666667 0.92592593 0.91666667 0.89814815 0.90740741
|
|
0.94444444 0.91666667 0.91666667 0.90740741]
|
|
|
|
mean value: 0.9175925925925926
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.875 0.83333333 0.75 0.83333333 0.625
|
|
0.83333333 0.83333333 0.875 0.83333333]
|
|
|
|
mean value: 0.8125
|
|
|
|
key: train_roc_auc
|
|
value: [0.87037037 0.87037037 0.87962963 0.89351852 0.86111111 0.86574074
|
|
0.92592593 0.85648148 0.89814815 0.86574074]
|
|
|
|
mean value: 0.8787037037037037
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.76923077 0.75 0.64705882 0.75 0.5
|
|
0.69230769 0.71428571 0.78571429 0.73333333]
|
|
|
|
mean value: 0.7056216332686921
|
|
|
|
key: train_jcc
|
|
value: [0.78125 0.77952756 0.79365079 0.81147541 0.76377953 0.77165354
|
|
0.86440678 0.76153846 0.81818182 0.77165354]
|
|
|
|
mean value: 0.7917117436096502
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01284027 0.01234365 0.00941205 0.00910616 0.00889063 0.009027
|
|
0.00890779 0.00902915 0.00880742 0.00880718]
|
|
|
|
mean value: 0.009717130661010742
|
|
|
|
key: score_time
|
|
value: [0.01170111 0.00989246 0.00911975 0.00862837 0.00865746 0.00871444
|
|
0.00870013 0.00863981 0.00864148 0.00862479]
|
|
|
|
mean value: 0.009131979942321778
|
|
|
|
key: test_mcc
|
|
value: [0.53033009 0.43033148 0.30779351 0.51298918 0.51298918 0.3380617
|
|
0.43033148 0.51298918 0.53033009 0.60246408]
|
|
|
|
mean value: 0.47086099493250855
|
|
|
|
key: train_mcc
|
|
value: [0.49840764 0.50566876 0.53033009 0.53831098 0.5267854 0.51043405
|
|
0.53045108 0.49458912 0.49840764 0.48770901]
|
|
|
|
mean value: 0.5121093761721703
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.70833333 0.625 0.70833333 0.70833333 0.66666667
|
|
0.70833333 0.70833333 0.75 0.79166667]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_accuracy
|
|
value: [0.73148148 0.71759259 0.75 0.75462963 0.75 0.73611111
|
|
0.74537037 0.73148148 0.73148148 0.73611111]
|
|
|
|
mean value: 0.7384259259259259
|
|
|
|
key: test_fscore
|
|
value: [0.78571429 0.74074074 0.70967742 0.77419355 0.77419355 0.69230769
|
|
0.74074074 0.77419355 0.78571429 0.81481481]
|
|
|
|
mean value: 0.7592290624548689
|
|
|
|
key: train_fscore
|
|
value: [0.7734375 0.77490775 0.78571429 0.78884462 0.784 0.77821012
|
|
0.78599222 0.77165354 0.7734375 0.7654321 ]
|
|
|
|
mean value: 0.778162963300859
|
|
|
|
key: test_precision
|
|
value: [0.6875 0.66666667 0.57894737 0.63157895 0.63157895 0.64285714
|
|
0.66666667 0.63157895 0.6875 0.73333333]
|
|
|
|
mean value: 0.6558208020050125
|
|
|
|
key: train_precision
|
|
value: [0.66891892 0.64417178 0.6875 0.69230769 0.69014085 0.67114094
|
|
0.67785235 0.67123288 0.66891892 0.68888889]
|
|
|
|
mean value: 0.6761073208548879
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.91666667 1. 1. 0.75
|
|
0.83333333 1. 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9083333333333333
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.97222222 0.91666667 0.91666667 0.90740741 0.92592593
|
|
0.93518519 0.90740741 0.91666667 0.86111111]
|
|
|
|
mean value: 0.9175925925925926
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.70833333 0.625 0.70833333 0.70833333 0.66666667
|
|
0.70833333 0.70833333 0.75 0.79166667]
|
|
|
|
mean value: 0.7124999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.73148148 0.71759259 0.75 0.75462963 0.75 0.73611111
|
|
0.74537037 0.73148148 0.73148148 0.73611111]
|
|
|
|
mean value: 0.7384259259259259
|
|
|
|
key: test_jcc
|
|
value: [0.64705882 0.58823529 0.55 0.63157895 0.63157895 0.52941176
|
|
0.58823529 0.63157895 0.64705882 0.6875 ]
|
|
|
|
mean value: 0.6132236842105263
|
|
|
|
key: train_jcc
|
|
value: [0.63057325 0.63253012 0.64705882 0.65131579 0.64473684 0.63694268
|
|
0.6474359 0.62820513 0.63057325 0.62 ]
|
|
|
|
mean value: 0.6369371773205834
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00939465 0.00916624 0.0092895 0.00931859 0.00914979 0.00933838
|
|
0.0092392 0.0092268 0.00913119 0.00918865]
|
|
|
|
mean value: 0.009244298934936524
|
|
|
|
key: score_time
|
|
value: [0.00876069 0.00885034 0.00866103 0.00876975 0.0086937 0.00873065
|
|
0.0087502 0.00870895 0.00870633 0.00871468]
|
|
|
|
mean value: 0.008734631538391113
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.43033148 0.58536941 0.57735027 0.2508726 0.3380617
|
|
0.58536941 0.5 0.60246408 0.6761234 ]
|
|
|
|
mean value: 0.5131311757869508
|
|
|
|
key: train_mcc
|
|
value: [0.56542109 0.63957467 0.64111887 0.62103628 0.65366344 0.64023511
|
|
0.58638277 0.63355259 0.60187765 0.62361342]
|
|
|
|
mean value: 0.6206475899027868
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.79166667 0.75 0.625 0.66666667
|
|
0.79166667 0.75 0.79166667 0.83333333]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_accuracy
|
|
value: [0.78240741 0.81944444 0.81944444 0.81018519 0.82407407 0.81944444
|
|
0.79166667 0.81481481 0.80092593 0.81018519]
|
|
|
|
mean value: 0.8092592592592592
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.66666667 0.8 0.8 0.64 0.69230769
|
|
0.7826087 0.75 0.81481481 0.84615385]
|
|
|
|
mean value: 0.7575160411247368
|
|
|
|
key: train_fscore
|
|
value: [0.78733032 0.82352941 0.82666667 0.81447964 0.83478261 0.82511211
|
|
0.80176211 0.8245614 0.8 0.81938326]
|
|
|
|
mean value: 0.8157607527459586
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.77777778 0.76923077 0.66666667 0.61538462 0.64285714
|
|
0.81818182 0.75 0.73333333 0.78571429]
|
|
|
|
mean value: 0.7377328227328227
|
|
|
|
key: train_precision
|
|
value: [0.7699115 0.80530973 0.79487179 0.79646018 0.78688525 0.8
|
|
0.76470588 0.78333333 0.80373832 0.78151261]
|
|
|
|
mean value: 0.7886728595187938
|
|
|
|
key: test_recall
|
|
value: [0.75 0.58333333 0.83333333 1. 0.66666667 0.75
|
|
0.75 0.75 0.91666667 0.91666667]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_recall
|
|
value: [0.80555556 0.84259259 0.86111111 0.83333333 0.88888889 0.85185185
|
|
0.84259259 0.87037037 0.7962963 0.86111111]
|
|
|
|
mean value: 0.8453703703703703
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.79166667 0.75 0.625 0.66666667
|
|
0.79166667 0.75 0.79166667 0.83333333]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [0.78240741 0.81944444 0.81944444 0.81018519 0.82407407 0.81944444
|
|
0.79166667 0.81481481 0.80092593 0.81018519]
|
|
|
|
mean value: 0.8092592592592592
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.5 0.66666667 0.66666667 0.47058824 0.52941176
|
|
0.64285714 0.6 0.6875 0.73333333]
|
|
|
|
mean value: 0.6139880952380953
|
|
|
|
key: train_jcc
|
|
value: [0.64925373 0.7 0.70454545 0.6870229 0.71641791 0.70229008
|
|
0.66911765 0.70149254 0.66666667 0.69402985]
|
|
|
|
mean value: 0.6890836775220928
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01033831 0.00852084 0.00854516 0.00849104 0.00850368 0.00855517
|
|
0.00873184 0.00868368 0.00860906 0.00962782]
|
|
|
|
mean value: 0.0088606595993042
|
|
|
|
key: score_time
|
|
value: [0.01552606 0.01441646 0.01456785 0.01397634 0.01450419 0.01429963
|
|
0.01512551 0.01440215 0.01423144 0.00980425]
|
|
|
|
mean value: 0.01408538818359375
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.41812101 0.33333333 0.09166985 0.50709255 0.27500955
|
|
0.16903085 0.5 0.66666667 0.2508726 ]
|
|
|
|
mean value: 0.3964414219606152
|
|
|
|
key: train_mcc
|
|
value: [0.58760578 0.61491869 0.61138741 0.64825931 0.59763515 0.62361342
|
|
0.6094494 0.60395256 0.63355259 0.61631125]
|
|
|
|
mean value: 0.614668557893251
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.70833333 0.66666667 0.54166667 0.75 0.625
|
|
0.58333333 0.75 0.83333333 0.625 ]
|
|
|
|
mean value: 0.6958333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.79166667 0.80555556 0.80092593 0.82407407 0.7962963 0.81018519
|
|
0.80092593 0.80092593 0.81481481 0.80555556]
|
|
|
|
mean value: 0.8050925925925926
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.72 0.66666667 0.62068966 0.76923077 0.68965517
|
|
0.61538462 0.75 0.83333333 0.64 ]
|
|
|
|
mean value: 0.7174525429592896
|
|
|
|
key: train_fscore
|
|
value: [0.80349345 0.81578947 0.81702128 0.82568807 0.80869565 0.81938326
|
|
0.81545064 0.80888889 0.8245614 0.8173913 ]
|
|
|
|
mean value: 0.8156363426064228
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.69230769 0.66666667 0.52941176 0.71428571 0.58823529
|
|
0.57142857 0.75 0.83333333 0.61538462]
|
|
|
|
mean value: 0.6870144561321032
|
|
|
|
key: train_precision
|
|
value: [0.76033058 0.775 0.75590551 0.81818182 0.76229508 0.78151261
|
|
0.76 0.77777778 0.78333333 0.7704918 ]
|
|
|
|
mean value: 0.7744828509904268
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.66666667 0.75 0.83333333 0.83333333
|
|
0.66666667 0.75 0.83333333 0.66666667]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.85185185 0.86111111 0.88888889 0.83333333 0.86111111 0.86111111
|
|
0.87962963 0.84259259 0.87037037 0.87037037]
|
|
|
|
mean value: 0.862037037037037
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.70833333 0.66666667 0.54166667 0.75 0.625
|
|
0.58333333 0.75 0.83333333 0.625 ]
|
|
|
|
mean value: 0.6958333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.79166667 0.80555556 0.80092593 0.82407407 0.7962963 0.81018519
|
|
0.80092593 0.80092593 0.81481481 0.80555556]
|
|
|
|
mean value: 0.8050925925925926
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.5625 0.5 0.45 0.625 0.52631579
|
|
0.44444444 0.6 0.71428571 0.47058824]
|
|
|
|
mean value: 0.566236495272873
|
|
|
|
key: train_jcc
|
|
value: [0.67153285 0.68888889 0.69064748 0.703125 0.67883212 0.69402985
|
|
0.6884058 0.67910448 0.70149254 0.69117647]
|
|
|
|
mean value: 0.6887235467768253
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01374173 0.01213717 0.01173711 0.01166201 0.01164699 0.01172185
|
|
0.01182199 0.01179361 0.01172495 0.01177144]
|
|
|
|
mean value: 0.011975884437561035
|
|
|
|
key: score_time
|
|
value: [0.01068091 0.00942492 0.00945401 0.00938773 0.00937343 0.0093987
|
|
0.00945997 0.00949836 0.00942206 0.00950742]
|
|
|
|
mean value: 0.009560751914978027
|
|
|
|
key: test_mcc
|
|
value: [0.83333333 0.75261781 0.60246408 0.51298918 0.50709255 0.35355339
|
|
0.6761234 0.6761234 0.66666667 0.6761234 ]
|
|
|
|
mean value: 0.6257087215904491
|
|
|
|
key: train_mcc
|
|
value: [0.77822 0.81145561 0.78262379 0.81705949 0.8183303 0.79280145
|
|
0.74571454 0.78262379 0.78978412 0.79115136]
|
|
|
|
mean value: 0.7909764449231979
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.875 0.79166667 0.70833333 0.75 0.66666667
|
|
0.83333333 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8041666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.88425926 0.90277778 0.88888889 0.90740741 0.90740741 0.89351852
|
|
0.86574074 0.88888889 0.89351852 0.89351852]
|
|
|
|
mean value: 0.8925925925925926
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.86956522 0.81481481 0.77419355 0.76923077 0.71428571
|
|
0.84615385 0.81818182 0.83333333 0.84615385]
|
|
|
|
mean value: 0.820257957459921
|
|
|
|
key: train_fscore
|
|
value: [0.89270386 0.90829694 0.89473684 0.91071429 0.91150442 0.89956332
|
|
0.87763713 0.89473684 0.89777778 0.89867841]
|
|
|
|
mean value: 0.8986349842049632
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.90909091 0.73333333 0.63157895 0.71428571 0.625
|
|
0.78571429 0.9 0.83333333 0.78571429]
|
|
|
|
mean value: 0.7834717475506949
|
|
|
|
key: train_precision
|
|
value: [0.832 0.85950413 0.85 0.87931034 0.87288136 0.85123967
|
|
0.80620155 0.85 0.86324786 0.85714286]
|
|
|
|
mean value: 0.8521527773191
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.83333333 0.91666667 1. 0.83333333 0.83333333
|
|
0.91666667 0.75 0.83333333 0.91666667]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_recall
|
|
value: [0.96296296 0.96296296 0.94444444 0.94444444 0.9537037 0.9537037
|
|
0.96296296 0.94444444 0.93518519 0.94444444]
|
|
|
|
mean value: 0.950925925925926
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.875 0.79166667 0.70833333 0.75 0.66666667
|
|
0.83333333 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.8041666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.88425926 0.90277778 0.88888889 0.90740741 0.90740741 0.89351852
|
|
0.86574074 0.88888889 0.89351852 0.89351852]
|
|
|
|
mean value: 0.8925925925925926
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.76923077 0.6875 0.63157895 0.625 0.55555556
|
|
0.73333333 0.69230769 0.71428571 0.73333333]
|
|
|
|
mean value: 0.6988279191568665
|
|
|
|
key: train_jcc
|
|
value: [0.80620155 0.832 0.80952381 0.83606557 0.83739837 0.81746032
|
|
0.78195489 0.80952381 0.81451613 0.816 ]
|
|
|
|
mean value: 0.8160644450900069
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.1305542 1.02247667 1.16441059 1.02352166 1.15711308 0.49866986
|
|
1.0611999 0.99131846 0.8503859 1.16561484]
|
|
|
|
mean value: 1.006526517868042
|
|
|
|
key: score_time
|
|
value: [0.01458883 0.01414919 0.01413441 0.01450205 0.01472497 0.01218414
|
|
0.01464009 0.01218653 0.0121913 0.02671266]
|
|
|
|
mean value: 0.015001416206359863
|
|
|
|
key: test_mcc
|
|
value: [0.60246408 0.45834925 0.53033009 0.35355339 0.50709255 0.60246408
|
|
0.6761234 0.66666667 0.6761234 0.50709255]
|
|
|
|
mean value: 0.5580259457057222
|
|
|
|
key: train_mcc
|
|
value: [0.94444444 0.96312812 0.95374459 0.95407186 0.98164982 0.82174833
|
|
0.93522528 0.90284331 0.90004066 0.95374459]
|
|
|
|
mean value: 0.9310640995379719
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.75 0.66666667 0.75 0.79166667
|
|
0.83333333 0.83333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.97222222 0.98148148 0.97685185 0.97685185 0.99074074 0.90740741
|
|
0.96759259 0.94907407 0.94907407 0.97685185]
|
|
|
|
mean value: 0.9648148148148148
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.63157895 0.78571429 0.71428571 0.76923077 0.76190476
|
|
0.81818182 0.83333333 0.84615385 0.72727273]
|
|
|
|
mean value: 0.7649560965350439
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[0.97222222 0.98165138 0.97695853 0.97716895 0.99065421 0.9009901
|
|
0.96744186 0.95154185 0.95067265 0.97674419]
|
|
|
|
mean value: 0.9646045920575503
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.85714286 0.6875 0.625 0.71428571 0.88888889
|
|
0.9 0.83333333 0.78571429 0.8 ]
|
|
|
|
mean value: 0.7980753968253969
|
|
|
|
key: train_precision
|
|
value: [0.97222222 0.97272727 0.97247706 0.96396396 1. 0.96808511
|
|
0.97196262 0.90756303 0.92173913 0.98130841]
|
|
|
|
mean value: 0.9632048813198871
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.5 0.91666667 0.83333333 0.83333333 0.66666667
|
|
0.75 0.83333333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [0.97222222 0.99074074 0.98148148 0.99074074 0.98148148 0.84259259
|
|
0.96296296 1. 0.98148148 0.97222222]
|
|
|
|
mean value: 0.9675925925925926
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.75 0.66666667 0.75 0.79166667
|
|
0.83333333 0.83333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.97222222 0.98148148 0.97685185 0.97685185 0.99074074 0.90740741
|
|
0.96759259 0.94907407 0.94907407 0.97685185]
|
|
|
|
mean value: 0.9648148148148148
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.46153846 0.64705882 0.55555556 0.625 0.61538462
|
|
0.69230769 0.71428571 0.73333333 0.57142857]
|
|
|
|
mean value: 0.6231277382747971
|
|
|
|
key: train_jcc
|
|
value: [0.94594595 0.96396396 0.95495495 0.95535714 0.98148148 0.81981982
|
|
0.93693694 0.90756303 0.90598291 0.95454545]
|
|
|
|
mean value: 0.9326551631698691
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02105474 0.0174222 0.0158999 0.01779056 0.01491666 0.01658893
|
|
0.01581645 0.01523876 0.0158267 0.01972294]
|
|
|
|
mean value: 0.017027783393859863
|
|
|
|
key: score_time
|
|
value: [0.0117085 0.00898194 0.00898838 0.00868154 0.00867081 0.0085578
|
|
0.00858188 0.0085516 0.0085907 0.00888038]
|
|
|
|
mean value: 0.0090193510055542
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.84515425 0.77459667 0.43033148 0.0860663 0.43033148
|
|
0.41812101 0.77459667 0.66666667 0.25819889]
|
|
|
|
mean value: 0.5269432824040078
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.91666667 0.875 0.70833333 0.54166667 0.70833333
|
|
0.70833333 0.875 0.83333333 0.625 ]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.90909091 0.88888889 0.66666667 0.59259259 0.74074074
|
|
0.72 0.85714286 0.83333333 0.66666667]
|
|
|
|
mean value: 0.7657731350774829
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 1. 0.8 0.77777778 0.53333333 0.66666667
|
|
0.69230769 1. 0.83333333 0.6 ]
|
|
|
|
mean value: 0.7721600621600622
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.83333333 1. 0.58333333 0.66666667 0.83333333
|
|
0.75 0.75 0.83333333 0.75 ]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.91666667 0.875 0.70833333 0.54166667 0.70833333
|
|
0.70833333 0.875 0.83333333 0.625 ]
|
|
|
|
mean value: 0.7583333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.83333333 0.8 0.5 0.42105263 0.58823529
|
|
0.5625 0.75 0.71428571 0.5 ]
|
|
|
|
mean value: 0.6312264116172785
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09728909 0.0968554 0.10155797 0.09642196 0.09648037 0.0966258
|
|
0.09753299 0.09809136 0.09761524 0.09700441]
|
|
|
|
mean value: 0.09754745960235596
|
|
|
|
key: score_time
|
|
value: [0.01724434 0.01726818 0.01727366 0.01733685 0.01723623 0.01715851
|
|
0.01735926 0.01729465 0.01727057 0.01740146]
|
|
|
|
mean value: 0.017284369468688963
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.45834925 0.6761234 0.27500955 0.41812101 0.53033009
|
|
0.77459667 0.58536941 0.91986621 0.41812101]
|
|
|
|
mean value: 0.5808504393563012
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.70833333 0.83333333 0.625 0.70833333 0.75
|
|
0.875 0.79166667 0.95833333 0.70833333]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.63157895 0.84615385 0.68965517 0.72 0.78571429
|
|
0.85714286 0.8 0.95652174 0.72 ]
|
|
|
|
mean value: 0.7876332065314942
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.85714286 0.78571429 0.58823529 0.69230769 0.6875
|
|
1. 0.76923077 1. 0.69230769]
|
|
|
|
mean value: 0.7981529499911852
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.5 0.91666667 0.83333333 0.75 0.91666667
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.70833333 0.83333333 0.625 0.70833333 0.75
|
|
0.875 0.79166667 0.95833333 0.70833333]
|
|
|
|
mean value: 0.7833333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.46153846 0.73333333 0.52631579 0.5625 0.64705882
|
|
0.75 0.66666667 0.91666667 0.5625 ]
|
|
|
|
mean value: 0.6595810510438993
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01035261 0.00976276 0.00918221 0.00890946 0.00899911 0.0090673
|
|
0.00910568 0.00909877 0.00917673 0.00915146]
|
|
|
|
mean value: 0.00928061008453369
|
|
|
|
key: score_time
|
|
value: [0.00932956 0.00861287 0.00856042 0.00861096 0.00864172 0.00856161
|
|
0.00855303 0.00860214 0.00854778 0.00869441]
|
|
|
|
mean value: 0.008671450614929199
|
|
|
|
key: test_mcc
|
|
value: [ 0.2508726 0.43033148 0.35355339 -0.0836242 0.3380617 0.41812101
|
|
0.3380617 0.33333333 0.38490018 0.3380617 ]
|
|
|
|
mean value: 0.3101672898977476
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.70833333 0.66666667 0.45833333 0.66666667 0.70833333
|
|
0.66666667 0.66666667 0.66666667 0.66666667]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.64 0.74074074 0.6 0.48 0.69230769 0.69565217
|
|
0.63636364 0.66666667 0.55555556 0.69230769]
|
|
|
|
mean value: 0.6399594157855027
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.66666667 0.75 0.46153846 0.64285714 0.72727273
|
|
0.7 0.66666667 0.83333333 0.64285714]
|
|
|
|
mean value: 0.6706576756576756
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.83333333 0.5 0.5 0.75 0.66666667
|
|
0.58333333 0.66666667 0.41666667 0.75 ]
|
|
|
|
mean value: 0.6333333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.70833333 0.66666667 0.45833333 0.66666667 0.70833333
|
|
0.66666667 0.66666667 0.66666667 0.66666667]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.47058824 0.58823529 0.42857143 0.31578947 0.52941176 0.53333333
|
|
0.46666667 0.5 0.38461538 0.52941176]
|
|
|
|
mean value: 0.47466233456945534
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.31476283 1.30447841 1.46462393 1.34692645 1.30058432 1.30353808
|
|
1.35730481 1.30286336 1.30742908 1.2984364 ]
|
|
|
|
mean value: 1.3300947666168212
|
|
|
|
key: score_time
|
|
value: [0.08968663 0.0898366 0.09236312 0.09146023 0.08972764 0.0894835
|
|
0.09211373 0.08971739 0.09021425 0.09686351]
|
|
|
|
mean value: 0.09114665985107422
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.45834925 0.64168895 0.43033148 0.60246408 0.2508726
|
|
0.70710678 0.66666667 0.91986621 0.33333333]
|
|
|
|
mean value: 0.5785276019860456
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.70833333 0.79166667 0.70833333 0.79166667 0.625
|
|
0.83333333 0.83333333 0.95833333 0.66666667]
|
|
|
|
mean value: 0.7791666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.63157895 0.82758621 0.74074074 0.81481481 0.60869565
|
|
0.8 0.83333333 0.95652174 0.66666667]
|
|
|
|
mean value: 0.7737080958267734
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 0.70588235 0.66666667 0.73333333 0.63636364
|
|
1. 0.83333333 1. 0.66666667]
|
|
|
|
mean value: 0.809938884644767
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 1. 0.83333333 0.91666667 0.58333333
|
|
0.66666667 0.83333333 0.91666667 0.66666667]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.70833333 0.79166667 0.70833333 0.79166667 0.625
|
|
0.83333333 0.83333333 0.95833333 0.66666667]
|
|
|
|
mean value: 0.7791666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.75 0.46153846 0.70588235 0.58823529 0.6875 0.4375
|
|
0.66666667 0.71428571 0.91666667 0.5 ]
|
|
|
|
mean value: 0.6428275156216333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.90877461 0.96970248 0.89637876 0.99821568 0.90704012 0.97669077
|
|
0.92202234 0.96597815 0.96081018 0.92725229]
|
|
|
|
mean value: 0.9432865381240845
|
|
|
|
key: score_time
|
|
value: [0.23616505 0.23439336 0.24036169 0.15945172 0.21677756 0.21420693
|
|
0.23975849 0.24806404 0.22143126 0.23800039]
|
|
|
|
mean value: 0.2248610496520996
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.53033009 0.64168895 0.50709255 0.6761234 0.3380617
|
|
0.77459667 0.75261781 0.84515425 0.50709255]
|
|
|
|
mean value: 0.6347354647375963
|
|
|
|
key: train_mcc
|
|
value: [0.90803041 0.89849486 0.89849486 0.90756304 0.95374459 0.89818665
|
|
0.89911222 0.91702052 0.89911222 0.90756304]
|
|
|
|
mean value: 0.9087322406621408
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.79166667 0.75 0.83333333 0.66666667
|
|
0.875 0.875 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.9537037 0.94907407 0.94907407 0.9537037 0.97685185 0.94907407
|
|
0.94907407 0.95833333 0.94907407 0.9537037 ]
|
|
|
|
mean value: 0.9541666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.7 0.82758621 0.76923077 0.84615385 0.63636364
|
|
0.85714286 0.86956522 0.90909091 0.76923077]
|
|
|
|
mean value: 0.8041507068643501
|
|
|
|
key: train_fscore
|
|
value: [0.95454545 0.94977169 0.94977169 0.95412844 0.97674419 0.94930876
|
|
0.95022624 0.95890411 0.95022624 0.95412844]
|
|
|
|
mean value: 0.9547755254358538
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.70588235 0.71428571 0.78571429 0.7
|
|
1. 0.90909091 1. 0.71428571]
|
|
|
|
mean value: 0.84042589763178
|
|
|
|
key: train_precision
|
|
value: [0.9375 0.93693694 0.93693694 0.94545455 0.98130841 0.94495413
|
|
0.92920354 0.94594595 0.92920354 0.94545455]
|
|
|
|
mean value: 0.9432898530030248
|
|
|
|
key: test_recall
|
|
value: [0.75 0.58333333 1. 0.83333333 0.91666667 0.58333333
|
|
0.75 0.83333333 0.83333333 0.83333333]
|
|
|
|
mean value: 0.7916666666666667
|
|
|
|
key: train_recall
|
|
value: [0.97222222 0.96296296 0.96296296 0.96296296 0.97222222 0.9537037
|
|
0.97222222 0.97222222 0.97222222 0.96296296]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.79166667 0.75 0.83333333 0.66666667
|
|
0.875 0.875 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.9537037 0.94907407 0.94907407 0.9537037 0.97685185 0.94907407
|
|
0.94907407 0.95833333 0.94907407 0.9537037 ]
|
|
|
|
mean value: 0.9541666666666666
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.53846154 0.70588235 0.625 0.73333333 0.46666667
|
|
0.75 0.76923077 0.83333333 0.625 ]
|
|
|
|
mean value: 0.6796907993966818
|
|
|
|
key: train_jcc
|
|
value: [0.91304348 0.90434783 0.90434783 0.9122807 0.95454545 0.90350877
|
|
0.90517241 0.92105263 0.90517241 0.9122807 ]
|
|
|
|
mean value: 0.9135752219583988
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01139498 0.01070619 0.01048851 0.01043177 0.01021743 0.00961018
|
|
0.0094676 0.01010823 0.00943756 0.0102725 ]
|
|
|
|
mean value: 0.010213494300842285
|
|
|
|
key: score_time
|
|
value: [0.00988126 0.00973415 0.00977278 0.00937963 0.00891113 0.00978112
|
|
0.00912976 0.00887871 0.00883675 0.0089314 ]
|
|
|
|
mean value: 0.009323668479919434
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.43033148 0.58536941 0.57735027 0.2508726 0.3380617
|
|
0.58536941 0.5 0.60246408 0.6761234 ]
|
|
|
|
mean value: 0.5131311757869508
|
|
|
|
key: train_mcc
|
|
value: [0.56542109 0.63957467 0.64111887 0.62103628 0.65366344 0.64023511
|
|
0.58638277 0.63355259 0.60187765 0.62361342]
|
|
|
|
mean value: 0.6206475899027868
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.79166667 0.75 0.625 0.66666667
|
|
0.79166667 0.75 0.79166667 0.83333333]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_accuracy
|
|
value: [0.78240741 0.81944444 0.81944444 0.81018519 0.82407407 0.81944444
|
|
0.79166667 0.81481481 0.80092593 0.81018519]
|
|
|
|
mean value: 0.8092592592592592
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.66666667 0.8 0.8 0.64 0.69230769
|
|
0.7826087 0.75 0.81481481 0.84615385]
|
|
|
|
mean value: 0.7575160411247368
|
|
|
|
key: train_fscore
|
|
value: [0.78733032 0.82352941 0.82666667 0.81447964 0.83478261 0.82511211
|
|
0.80176211 0.8245614 0.8 0.81938326]
|
|
|
|
mean value: 0.8157607527459586
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.77777778 0.76923077 0.66666667 0.61538462 0.64285714
|
|
0.81818182 0.75 0.73333333 0.78571429]
|
|
|
|
mean value: 0.7377328227328227
|
|
|
|
key: train_precision
|
|
value: [0.7699115 0.80530973 0.79487179 0.79646018 0.78688525 0.8
|
|
0.76470588 0.78333333 0.80373832 0.78151261]
|
|
|
|
mean value: 0.7886728595187938
|
|
|
|
key: test_recall
|
|
value: [0.75 0.58333333 0.83333333 1. 0.66666667 0.75
|
|
0.75 0.75 0.91666667 0.91666667]
|
|
|
|
mean value: 0.7916666666666666
|
|
|
|
key: train_recall
|
|
value: [0.80555556 0.84259259 0.86111111 0.83333333 0.88888889 0.85185185
|
|
0.84259259 0.87037037 0.7962963 0.86111111]
|
|
|
|
mean value: 0.8453703703703703
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.79166667 0.75 0.625 0.66666667
|
|
0.79166667 0.75 0.79166667 0.83333333]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [0.78240741 0.81944444 0.81944444 0.81018519 0.82407407 0.81944444
|
|
0.79166667 0.81481481 0.80092593 0.81018519]
|
|
|
|
mean value: 0.8092592592592592
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.5 0.66666667 0.66666667 0.47058824 0.52941176
|
|
0.64285714 0.6 0.6875 0.73333333]
|
|
|
|
mean value: 0.6139880952380953
|
|
|
|
key: train_jcc
|
|
value: [0.64925373 0.7 0.70454545 0.6870229 0.71641791 0.70229008
|
|
0.66911765 0.70149254 0.66666667 0.69402985]
|
|
|
|
mean value: 0.6890836775220928
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.09627533 0.07803988 0.06252694 0.06555963 0.06782389 0.06903291
|
|
0.24227285 0.05894995 0.06242561 0.07130003]
|
|
|
|
mean value: 0.08742070198059082
|
|
|
|
key: score_time
|
|
value: [0.01102471 0.01105189 0.01042008 0.01050401 0.01191854 0.01149631
|
|
0.01142263 0.01128268 0.01066589 0.01059556]
|
|
|
|
mean value: 0.01103823184967041
|
|
|
|
key: test_mcc
|
|
value: [0.84515425 0.53033009 0.64168895 0.53033009 0.6761234 0.41812101
|
|
0.75261781 0.58536941 0.83333333 0.5 ]
|
|
|
|
mean value: 0.6313068332559123
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.75 0.79166667 0.75 0.83333333 0.70833333
|
|
0.875 0.79166667 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.7 0.82758621 0.78571429 0.84615385 0.72
|
|
0.86956522 0.7826087 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8107385827565737
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.70588235 0.6875 0.78571429 0.69230769
|
|
0.90909091 0.81818182 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8140343724902548
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.58333333 1. 0.91666667 0.91666667 0.75
|
|
0.83333333 0.75 0.91666667 0.75 ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.75 0.79166667 0.75 0.83333333 0.70833333
|
|
0.875 0.79166667 0.91666667 0.75 ]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.53846154 0.70588235 0.64705882 0.73333333 0.5625
|
|
0.76923077 0.64285714 0.84615385 0.6 ]
|
|
|
|
mean value: 0.6878811139840552
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04071808 0.06085467 0.0598805 0.06004024 0.06288719 0.059587
|
|
0.05953264 0.0594027 0.0682199 0.05782318]
|
|
|
|
mean value: 0.05889461040496826
|
|
|
|
key: score_time
|
|
value: [0.02352667 0.02167249 0.02408361 0.02246499 0.02260733 0.02096868
|
|
0.02389741 0.02272534 0.01970649 0.0183506 ]
|
|
|
|
mean value: 0.0220003604888916
|
|
|
|
key: test_mcc
|
|
value: [ 0.58536941 0.5 0.50709255 0.41812101 -0.0836242 0.3380617
|
|
0.60246408 0.66666667 0.6761234 0.5 ]
|
|
|
|
mean value: 0.47102746122625055
|
|
|
|
key: train_mcc
|
|
value: [0.94460643 0.95374459 0.93554619 0.96362411 0.98164982 0.93522528
|
|
0.95407186 0.95374459 0.96296296 0.97259753]
|
|
|
|
mean value: 0.9557773347439162
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.75 0.75 0.70833333 0.45833333 0.66666667
|
|
0.79166667 0.83333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7333333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.97222222 0.97685185 0.96759259 0.98148148 0.99074074 0.96759259
|
|
0.97685185 0.97685185 0.98148148 0.98611111]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.75 0.76923077 0.72 0.43478261 0.69230769
|
|
0.81481481 0.83333333 0.84615385 0.75 ]
|
|
|
|
mean value: 0.7410623064536108
|
|
|
|
key: train_fscore
|
|
value: [0.97247706 0.97674419 0.96803653 0.98181818 0.99082569 0.96774194
|
|
0.97716895 0.97695853 0.98148148 0.98630137]
|
|
|
|
mean value: 0.9779553911784314
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.75 0.71428571 0.69230769 0.45454545 0.64285714
|
|
0.73333333 0.83333333 0.78571429 0.75 ]
|
|
|
|
mean value: 0.7125607725607725
|
|
|
|
key: train_precision
|
|
value: [0.96363636 0.98130841 0.95495495 0.96428571 0.98181818 0.96330275
|
|
0.96396396 0.97247706 0.98148148 0.97297297]
|
|
|
|
mean value: 0.9700201860842348
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.83333333 0.75 0.41666667 0.75
|
|
0.91666667 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [0.98148148 0.97222222 0.98148148 1. 1. 0.97222222
|
|
0.99074074 0.98148148 0.98148148 1. ]
|
|
|
|
mean value: 0.9861111111111112
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.75 0.75 0.70833333 0.45833333 0.66666667
|
|
0.79166667 0.83333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7333333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.97222222 0.97685185 0.96759259 0.98148148 0.99074074 0.96759259
|
|
0.97685185 0.97685185 0.98148148 0.98611111]
|
|
|
|
mean value: 0.9777777777777779
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.6 0.625 0.5625 0.27777778 0.52941176
|
|
0.6875 0.71428571 0.73333333 0.6 ]
|
|
|
|
mean value: 0.5996475256769375
|
|
|
|
key: train_jcc
|
|
value: [0.94642857 0.95454545 0.9380531 0.96428571 0.98181818 0.9375
|
|
0.95535714 0.95495495 0.96363636 0.97297297]
|
|
|
|
mean value: 0.956955245384449
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01308775 0.01544952 0.01027894 0.00918627 0.0089798 0.00915718
|
|
0.00889015 0.00921917 0.00905752 0.00887823]
|
|
|
|
mean value: 0.010218453407287598
|
|
|
|
key: score_time
|
|
value: [0.0117414 0.00944948 0.00892258 0.00871181 0.008641 0.00863171
|
|
0.00862813 0.0086782 0.00863409 0.00874138]
|
|
|
|
mean value: 0.009077978134155274
|
|
|
|
key: test_mcc
|
|
value: [0.43033148 0.58536941 0.41812101 0.45834925 0.38490018 0.58536941
|
|
0.5 0.35355339 0.3380617 0.66666667]
|
|
|
|
mean value: 0.4720722489050611
|
|
|
|
key: train_mcc
|
|
value: [0.4472136 0.50557897 0.50709255 0.47111148 0.52174919 0.46812868
|
|
0.47684381 0.49433502 0.4406788 0.48685383]
|
|
|
|
mean value: 0.4819585924926295
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.79166667 0.70833333 0.70833333 0.66666667 0.79166667
|
|
0.75 0.66666667 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7291666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.72222222 0.75 0.75 0.73148148 0.75925926 0.73148148
|
|
0.73611111 0.74537037 0.71759259 0.74074074]
|
|
|
|
mean value: 0.7384259259259259
|
|
|
|
key: test_fscore
|
|
value: [0.74074074 0.7826087 0.72 0.75862069 0.73333333 0.8
|
|
0.75 0.71428571 0.63636364 0.83333333]
|
|
|
|
mean value: 0.7469286143364104
|
|
|
|
key: train_fscore
|
|
value: [0.73684211 0.76724138 0.76923077 0.75423729 0.77192982 0.75
|
|
0.75324675 0.75982533 0.73819742 0.75862069]
|
|
|
|
mean value: 0.7559371561806815
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.81818182 0.69230769 0.64705882 0.61111111 0.76923077
|
|
0.75 0.625 0.7 0.83333333]
|
|
|
|
mean value: 0.7112890214360803
|
|
|
|
key: train_precision
|
|
value: [0.7 0.71774194 0.71428571 0.6953125 0.73333333 0.7016129
|
|
0.70731707 0.71900826 0.688 0.70967742]
|
|
|
|
mean value: 0.7086289143317105
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.75 0.75 0.91666667 0.91666667 0.83333333
|
|
0.75 0.83333333 0.58333333 0.83333333]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [0.77777778 0.82407407 0.83333333 0.82407407 0.81481481 0.80555556
|
|
0.80555556 0.80555556 0.7962963 0.81481481]
|
|
|
|
mean value: 0.8101851851851852
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.79166667 0.70833333 0.70833333 0.66666667 0.79166667
|
|
0.75 0.66666667 0.66666667 0.83333333]
|
|
|
|
mean value: 0.7291666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.72222222 0.75 0.75 0.73148148 0.75925926 0.73148148
|
|
0.73611111 0.74537037 0.71759259 0.74074074]
|
|
|
|
mean value: 0.7384259259259259
|
|
|
|
key: test_jcc
|
|
value: [0.58823529 0.64285714 0.5625 0.61111111 0.57894737 0.66666667
|
|
0.6 0.55555556 0.46666667 0.71428571]
|
|
|
|
mean value: 0.5986825519681557
|
|
|
|
key: train_jcc
|
|
value: [0.58333333 0.62237762 0.625 0.60544218 0.62857143 0.6
|
|
0.60416667 0.61267606 0.58503401 0.61111111]
|
|
|
|
mean value: 0.6077712408874381
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01085949 0.01415586 0.01646209 0.01394892 0.0151155 0.01408863
|
|
0.01640916 0.0158937 0.01625681 0.01627421]
|
|
|
|
mean value: 0.014946436882019043
|
|
|
|
key: score_time
|
|
value: [0.00876379 0.01124716 0.01204467 0.01177979 0.01172805 0.01171541
|
|
0.01189089 0.01158404 0.01160598 0.01158595]
|
|
|
|
mean value: 0.011394572257995606
|
|
|
|
key: test_mcc
|
|
value: [0.60246408 0.58536941 0.53033009 0.4472136 0.35355339 0.2508726
|
|
0.5 0.60246408 0.58536941 0.50709255]
|
|
|
|
mean value: 0.4964729193985725
|
|
|
|
key: train_mcc
|
|
value: [0.81145561 0.71739923 0.76459339 0.64168895 0.66332496 0.77898084
|
|
0.80235109 0.70238053 0.77253603 0.87996919]
|
|
|
|
mean value: 0.7534679801942501
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.79166667 0.75 0.66666667 0.66666667 0.625
|
|
0.75 0.79166667 0.79166667 0.75 ]
|
|
|
|
mean value: 0.7374999999999999
|
|
|
|
key: train_accuracy
|
|
value: [0.90277778 0.84259259 0.875 0.79166667 0.80555556 0.88888889
|
|
0.89351852 0.83333333 0.88425926 0.93981481]
|
|
|
|
mean value: 0.8657407407407407
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.7826087 0.78571429 0.75 0.6 0.64
|
|
0.75 0.76190476 0.8 0.72727273]
|
|
|
|
mean value: 0.735940523244871
|
|
|
|
key: train_fscore
|
|
value: [0.89655172 0.86290323 0.86153846 0.82758621 0.75862069 0.89189189
|
|
0.90295359 0.8021978 0.87804878 0.93896714]
|
|
|
|
mean value: 0.8621259505260193
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.81818182 0.6875 0.6 0.75 0.61538462
|
|
0.75 0.88888889 0.76923077 0.8 ]
|
|
|
|
mean value: 0.756807498057498
|
|
|
|
key: train_precision
|
|
value: [0.95789474 0.76428571 0.96551724 0.70588235 1. 0.86842105
|
|
0.82945736 0.98648649 0.92783505 0.95238095]
|
|
|
|
mean value: 0.8958160952834802
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.75 0.91666667 1. 0.5 0.66666667
|
|
0.75 0.66666667 0.83333333 0.66666667]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_recall
|
|
value: [0.84259259 0.99074074 0.77777778 1. 0.61111111 0.91666667
|
|
0.99074074 0.67592593 0.83333333 0.92592593]
|
|
|
|
mean value: 0.8564814814814815
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.79166667 0.75 0.66666667 0.66666667 0.625
|
|
0.75 0.79166667 0.79166667 0.75 ]
|
|
|
|
mean value: 0.7374999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.90277778 0.84259259 0.875 0.79166667 0.80555556 0.88888889
|
|
0.89351852 0.83333333 0.88425926 0.93981481]
|
|
|
|
mean value: 0.8657407407407407
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.64285714 0.64705882 0.6 0.42857143 0.47058824
|
|
0.6 0.61538462 0.66666667 0.57142857]
|
|
|
|
mean value: 0.585794009911657
|
|
|
|
key: train_jcc
|
|
value: [0.8125 0.75886525 0.75675676 0.70588235 0.61111111 0.80487805
|
|
0.82307692 0.66972477 0.7826087 0.88495575]
|
|
|
|
mean value: 0.7610359659400171
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01570725 0.01456785 0.01438451 0.01643419 0.01524568 0.01557255
|
|
0.01395392 0.01653457 0.01462817 0.01384068]
|
|
|
|
mean value: 0.015086936950683593
|
|
|
|
key: score_time
|
|
value: [0.01181507 0.01158404 0.01152992 0.01155901 0.01153064 0.01153803
|
|
0.01155543 0.01160192 0.01161337 0.01156521]
|
|
|
|
mean value: 0.011589264869689942
|
|
|
|
key: test_mcc
|
|
value: [0.57735027 0.6761234 0.64168895 0.53033009 0.43033148 0.33333333
|
|
0.64168895 0.4472136 0.77459667 0.50709255]
|
|
|
|
mean value: 0.5559749288525665
|
|
|
|
key: train_mcc
|
|
value: [0.62017367 0.72861674 0.54167626 0.89911222 0.74428277 0.85243671
|
|
0.75734016 0.42465029 0.79697229 0.6824715 ]
|
|
|
|
mean value: 0.7047732619322473
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.83333333 0.79166667 0.75 0.70833333 0.66666667
|
|
0.79166667 0.66666667 0.875 0.75 ]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.77777778 0.85185185 0.72685185 0.94907407 0.85648148 0.92592593
|
|
0.875 0.65277778 0.89351852 0.8287037 ]
|
|
|
|
mean value: 0.8337962962962963
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.81818182 0.73684211 0.78571429 0.66666667 0.66666667
|
|
0.73684211 0.5 0.88888889 0.72727273]
|
|
|
|
mean value: 0.7327075263917369
|
|
|
|
key: train_fscore
|
|
value: [0.81818182 0.86885246 0.62420382 0.95022624 0.83243243 0.92727273
|
|
0.86567164 0.46808511 0.90128755 0.80213904]
|
|
|
|
mean value: 0.805835284215856
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.9 1. 0.6875 0.77777778 0.66666667
|
|
1. 1. 0.8 0.8 ]
|
|
|
|
mean value: 0.8298611111111112
|
|
|
|
key: train_precision
|
|
value: [0.69230769 0.77941176 1. 0.92920354 1. 0.91071429
|
|
0.93548387 1. 0.84 0.94936709]
|
|
|
|
mean value: 0.9036488242126206
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 0.58333333 0.91666667 0.58333333 0.66666667
|
|
0.58333333 0.33333333 1. 0.66666667]
|
|
|
|
mean value: 0.7083333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 0.98148148 0.4537037 0.97222222 0.71296296 0.94444444
|
|
0.80555556 0.30555556 0.97222222 0.69444444]
|
|
|
|
mean value: 0.7842592592592592
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.83333333 0.79166667 0.75 0.70833333 0.66666667
|
|
0.79166667 0.66666667 0.875 0.75 ]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.77777778 0.85185185 0.72685185 0.94907407 0.85648148 0.92592593
|
|
0.875 0.65277778 0.89351852 0.8287037 ]
|
|
|
|
mean value: 0.8337962962962963
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.69230769 0.58333333 0.64705882 0.5 0.5
|
|
0.58333333 0.33333333 0.8 0.57142857]
|
|
|
|
mean value: 0.5877461753932343
|
|
|
|
key: train_jcc
|
|
value: [0.69230769 0.76811594 0.4537037 0.90517241 0.71296296 0.86440678
|
|
0.76315789 0.30555556 0.8203125 0.66964286]
|
|
|
|
mean value: 0.695533830189272
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.133183 0.11564565 0.11664176 0.11746097 0.11651921 0.11830211
|
|
0.11635089 0.11619401 0.11829281 0.11623812]
|
|
|
|
mean value: 0.1184828519821167
|
|
|
|
key: score_time
|
|
value: [0.01493979 0.01472211 0.0150187 0.01492405 0.01475787 0.0148735
|
|
0.01482558 0.01505041 0.01577067 0.01480699]
|
|
|
|
mean value: 0.014968967437744141
|
|
|
|
key: test_mcc
|
|
value: [0.66666667 0.57735027 0.70710678 0.3380617 0.3380617 0.43033148
|
|
0.6761234 0.58536941 0.75261781 0.50709255]
|
|
|
|
mean value: 0.5578781776368856
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.83333333 0.66666667 0.66666667 0.70833333
|
|
0.83333333 0.79166667 0.875 0.75 ]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.66666667 0.85714286 0.69230769 0.69230769 0.74074074
|
|
0.81818182 0.7826087 0.88 0.72727273]
|
|
|
|
mean value: 0.7690562223605701
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 0.75 0.64285714 0.64285714 0.66666667
|
|
0.9 0.81818182 0.84615385 0.8 ]
|
|
|
|
mean value: 0.790004995004995
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.5 1. 0.75 0.75 0.83333333
|
|
0.75 0.75 0.91666667 0.66666667]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.75 0.83333333 0.66666667 0.66666667 0.70833333
|
|
0.83333333 0.79166667 0.875 0.75 ]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.5 0.75 0.52941176 0.52941176 0.58823529
|
|
0.69230769 0.64285714 0.78571429 0.57142857]
|
|
|
|
mean value: 0.6303652230122818
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04627037 0.04262686 0.04359245 0.05944014 0.0541997 0.05315614
|
|
0.05596638 0.05864263 0.05465889 0.06446767]
|
|
|
|
mean value: 0.05330212116241455
|
|
|
|
key: score_time
|
|
value: [0.01904321 0.0231092 0.02482772 0.02747893 0.02421951 0.02360153
|
|
0.02202749 0.03683686 0.02723002 0.04126835]
|
|
|
|
mean value: 0.026964282989501952
|
|
|
|
key: test_mcc
|
|
value: [0.60246408 0.43033148 0.50709255 0.58536941 0.3380617 0.16666667
|
|
0.43033148 0.60246408 0.91986621 0.41812101]
|
|
|
|
mean value: 0.5000768662388781
|
|
|
|
key: train_mcc
|
|
value: [0.97259753 0.98164982 0.98164982 0.96362411 0.98164982 0.96312812
|
|
0.98164982 0.97259753 0.98164982 0.99078321]
|
|
|
|
mean value: 0.9770979584094492
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.75 0.79166667 0.66666667 0.58333333
|
|
0.70833333 0.79166667 0.95833333 0.70833333]
|
|
|
|
mean value: 0.7458333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.98611111 0.99074074 0.99074074 0.98148148 0.99074074 0.98148148
|
|
0.99074074 0.98611111 0.99074074 0.99537037]
|
|
|
|
mean value: 0.9884259259259259
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.66666667 0.76923077 0.8 0.63636364 0.58333333
|
|
0.66666667 0.76190476 0.95652174 0.69565217]
|
|
|
|
mean value: 0.7298244509114075
|
|
|
|
key: train_fscore
|
|
value: [0.98591549 0.99065421 0.99065421 0.98113208 0.99065421 0.98130841
|
|
0.99065421 0.98591549 0.99065421 0.99534884]
|
|
|
|
mean value: 0.988289133784883
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.77777778 0.71428571 0.76923077 0.7 0.58333333
|
|
0.77777778 0.88888889 1. 0.72727273]
|
|
|
|
mean value: 0.7827455877455878
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.99056604
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990566037735849
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.58333333 0.83333333 0.83333333 0.58333333 0.58333333
|
|
0.58333333 0.66666667 0.91666667 0.66666667]
|
|
|
|
mean value: 0.6916666666666667
|
|
|
|
key: train_recall
|
|
value: [0.97222222 0.98148148 0.98148148 0.96296296 0.98148148 0.97222222
|
|
0.98148148 0.97222222 0.98148148 0.99074074]
|
|
|
|
mean value: 0.9777777777777777
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.75 0.79166667 0.66666667 0.58333333
|
|
0.70833333 0.79166667 0.95833333 0.70833333]
|
|
|
|
mean value: 0.7458333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.98611111 0.99074074 0.99074074 0.98148148 0.99074074 0.98148148
|
|
0.99074074 0.98611111 0.99074074 0.99537037]
|
|
|
|
mean value: 0.9884259259259259
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.5 0.625 0.66666667 0.46666667 0.41176471
|
|
0.5 0.61538462 0.91666667 0.53333333]
|
|
|
|
mean value: 0.5850867269984917
|
|
|
|
key: train_jcc
|
|
value: [0.97222222 0.98148148 0.98148148 0.96296296 0.98148148 0.96330275
|
|
0.98148148 0.97222222 0.98148148 0.99074074]
|
|
|
|
mean value: 0.9768858307849133
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04280233 0.0681746 0.06288624 0.06788301 0.042377 0.02782488
|
|
0.0268805 0.04071689 0.02718902 0.06667638]
|
|
|
|
mean value: 0.04734108448028564
|
|
|
|
key: score_time
|
|
value: [0.02413058 0.02410936 0.02109694 0.02472234 0.01284194 0.01278472
|
|
0.01273441 0.01275396 0.01282382 0.01282883]
|
|
|
|
mean value: 0.017082691192626953
|
|
|
|
key: test_mcc
|
|
value: [0.58536941 0.41812101 0.41812101 0.1767767 0.3380617 0.1767767
|
|
0.5 0.66666667 0.6761234 0.41812101]
|
|
|
|
mean value: 0.437413758494976
|
|
|
|
key: train_mcc
|
|
value: [0.99078321 0.99078321 0.99078321 0.99078321 1. 0.99078321
|
|
0.99078321 0.99078321 0.99078321 0.98164982]
|
|
|
|
mean value: 0.9907915525187407
|
|
|
|
key: test_accuracy
|
|
value: [0.79166667 0.70833333 0.70833333 0.58333333 0.66666667 0.58333333
|
|
0.75 0.83333333 0.83333333 0.70833333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.99537037 0.99537037 0.99537037 0.99537037 1. 0.99537037
|
|
0.99537037 0.99537037 0.99537037 0.99074074]
|
|
|
|
mean value: 0.9953703703703703
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.69565217 0.69565217 0.64285714 0.69230769 0.64285714
|
|
0.75 0.83333333 0.84615385 0.72 ]
|
|
|
|
mean value: 0.7301422200987419
|
|
|
|
key: train_fscore
|
|
value: [0.99539171 0.99539171 0.99539171 0.99539171 1. 0.99539171
|
|
0.99539171 0.99539171 0.99539171 0.99082569]
|
|
|
|
mean value: 0.9953959328626389
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.72727273 0.72727273 0.5625 0.64285714 0.5625
|
|
0.75 0.83333333 0.78571429 0.69230769]
|
|
|
|
mean value: 0.7101939726939727
|
|
|
|
key: train_precision
|
|
value: [0.99082569 0.99082569 0.99082569 0.99082569 1. 0.99082569
|
|
0.99082569 0.99082569 0.99082569 0.98181818]
|
|
|
|
mean value: 0.9908423686405339
|
|
|
|
key: test_recall
|
|
value: [0.75 0.66666667 0.66666667 0.75 0.75 0.75
|
|
0.75 0.83333333 0.91666667 0.75 ]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.70833333 0.70833333 0.58333333 0.66666667 0.58333333
|
|
0.75 0.83333333 0.83333333 0.70833333]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.99537037 0.99537037 0.99537037 0.99537037 1. 0.99537037
|
|
0.99537037 0.99537037 0.99537037 0.99074074]
|
|
|
|
mean value: 0.9953703703703703
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.53333333 0.53333333 0.47368421 0.52941176 0.47368421
|
|
0.6 0.71428571 0.73333333 0.5625 ]
|
|
|
|
mean value: 0.5796423042901371
|
|
|
|
key: train_jcc
|
|
value: [0.99082569 0.99082569 0.99082569 0.99082569 1. 0.99082569
|
|
0.99082569 0.99082569 0.99082569 0.98181818]
|
|
|
|
mean value: 0.9908423686405339
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.40733719 0.3867228 0.38469768 0.3909452 0.38922071 0.39397931
|
|
0.38517714 0.38699746 0.38558316 0.38554788]
|
|
|
|
mean value: 0.38962085247039796
|
|
|
|
key: score_time
|
|
value: [0.0094502 0.00933933 0.00938606 0.00928497 0.00938368 0.01016212
|
|
0.00926542 0.00927734 0.00943732 0.00922227]
|
|
|
|
mean value: 0.00942087173461914
|
|
|
|
key: test_mcc
|
|
value: [0.83333333 0.53033009 0.64168895 0.6761234 0.58536941 0.5
|
|
0.75261781 0.6761234 0.91986621 0.58536941]
|
|
|
|
mean value: 0.6700822008732727
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.75 0.79166667 0.83333333 0.79166667 0.75
|
|
0.875 0.83333333 0.95833333 0.79166667]
|
|
|
|
mean value: 0.8291666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.7 0.82758621 0.84615385 0.8 0.75
|
|
0.86956522 0.81818182 0.96 0.7826087 ]
|
|
|
|
mean value: 0.8270762450942362
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.875 0.70588235 0.78571429 0.76923077 0.75
|
|
0.90909091 0.9 0.92307692 0.81818182]
|
|
|
|
mean value: 0.8352843724902549
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.91666667 0.58333333 1. 0.91666667 0.83333333 0.75
|
|
0.83333333 0.75 1. 0.75 ]
|
|
|
|
mean value: 0.8333333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91666667 0.75 0.79166667 0.83333333 0.79166667 0.75
|
|
0.875 0.83333333 0.95833333 0.79166667]
|
|
|
|
mean value: 0.8291666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.53846154 0.70588235 0.73333333 0.66666667 0.6
|
|
0.76923077 0.69230769 0.92307692 0.64285714]
|
|
|
|
mean value: 0.7117970265029089
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02279496 0.02264142 0.02134824 0.02143693 0.02124429 0.0212903
|
|
0.02151275 0.02106762 0.02139115 0.02206588]
|
|
|
|
mean value: 0.02167935371398926
|
|
|
|
key: score_time
|
|
value: [0.01800084 0.01225257 0.01478553 0.01450801 0.0146749 0.015486
|
|
0.0145545 0.01485181 0.01477313 0.01217294]
|
|
|
|
mean value: 0.014606022834777832
|
|
|
|
key: test_mcc
|
|
value: [ 0.41812101 0.41812101 0.43033148 0. -0.0860663 0.66666667
|
|
0.41812101 0.2508726 0.6761234 0.43033148]
|
|
|
|
mean value: 0.36226223577037264
|
|
|
|
key: train_mcc
|
|
value: [1. 0.91986621 0.87777662 1. 0.89442719 1.
|
|
0.92847669 0.97259753 0.95472741 0.95472741]
|
|
|
|
mean value: 0.9502599060234108
|
|
|
|
key: test_accuracy
|
|
value: [0.70833333 0.70833333 0.70833333 0.5 0.45833333 0.83333333
|
|
0.70833333 0.625 0.83333333 0.70833333]
|
|
|
|
mean value: 0.6791666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.95833333 0.93518519 1. 0.94444444 1.
|
|
0.96296296 0.98611111 0.97685185 0.97685185]
|
|
|
|
mean value: 0.9740740740740741
|
|
|
|
key: test_fscore
|
|
value: [0.72 0.69565217 0.66666667 0.625 0.51851852 0.83333333
|
|
0.72 0.64 0.81818182 0.66666667]
|
|
|
|
mean value: 0.6904019177280046
|
|
|
|
key: train_fscore
|
|
value: [1. 0.96 0.93913043 1. 0.94736842 1.
|
|
0.96428571 0.98630137 0.97737557 0.97737557]
|
|
|
|
mean value: 0.9751837071205688
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.72727273 0.77777778 0.5 0.46666667 0.83333333
|
|
0.69230769 0.61538462 0.9 0.77777778]
|
|
|
|
mean value: 0.6982828282828283
|
|
|
|
key: train_precision
|
|
value: [1. 0.92307692 0.8852459 1. 0.9 1.
|
|
0.93103448 0.97297297 0.95575221 0.95575221]
|
|
|
|
mean value: 0.9523834705226623
|
|
|
|
key: test_recall
|
|
value: [0.75 0.66666667 0.58333333 0.83333333 0.58333333 0.83333333
|
|
0.75 0.66666667 0.75 0.58333333]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.70833333 0.5 0.45833333 0.83333333
|
|
0.70833333 0.625 0.83333333 0.70833333]
|
|
|
|
mean value: 0.6791666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.95833333 0.93518519 1. 0.94444444 1.
|
|
0.96296296 0.98611111 0.97685185 0.97685185]
|
|
|
|
mean value: 0.9740740740740741
|
|
|
|
key: test_jcc
|
|
value: [0.5625 0.53333333 0.5 0.45454545 0.35 0.71428571
|
|
0.5625 0.47058824 0.69230769 0.5 ]
|
|
|
|
mean value: 0.5340060429766312
|
|
|
|
key: train_jcc
|
|
value: [1. 0.92307692 0.8852459 1. 0.9 1.
|
|
0.93103448 0.97297297 0.95575221 0.95575221]
|
|
|
|
mean value: 0.9523834705226623
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02238536 0.03210616 0.03460526 0.03438401 0.03436708 0.03419876
|
|
0.03430533 0.03438544 0.03449702 0.03423023]
|
|
|
|
mean value: 0.03294646739959717
|
|
|
|
key: score_time
|
|
value: [0.01790905 0.02034116 0.0216105 0.02313852 0.02082276 0.02208257
|
|
0.02374363 0.02120662 0.02306151 0.02310467]
|
|
|
|
mean value: 0.021702098846435546
|
|
|
|
key: test_mcc
|
|
value: [0.6761234 0.60246408 0.60246408 0.43033148 0.2508726 0.3380617
|
|
0.6761234 0.6761234 0.60246408 0.58536941]
|
|
|
|
mean value: 0.5440397634389
|
|
|
|
key: train_mcc
|
|
value: [0.83390548 0.89849486 0.86203543 0.88057382 0.86144352 0.87996919
|
|
0.86292558 0.89133762 0.88904134 0.89026381]
|
|
|
|
mean value: 0.8749990658638971
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.79166667 0.79166667 0.70833333 0.625 0.66666667
|
|
0.83333333 0.83333333 0.79166667 0.79166667]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.91666667 0.94907407 0.93055556 0.93981481 0.93055556 0.93981481
|
|
0.93055556 0.94444444 0.94444444 0.94444444]
|
|
|
|
mean value: 0.937037037037037
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.76190476 0.81481481 0.74074074 0.64 0.69230769
|
|
0.81818182 0.81818182 0.81481481 0.7826087 ]
|
|
|
|
mean value: 0.7701736974780453
|
|
|
|
key: train_fscore
|
|
value: [0.91818182 0.94977169 0.9321267 0.94117647 0.93150685 0.94063927
|
|
0.93273543 0.94642857 0.94495413 0.94594595]
|
|
|
|
mean value: 0.9383466865645663
|
|
|
|
key: test_precision
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:195: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_rt.py:198: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.9 0.88888889 0.73333333 0.66666667 0.61538462 0.64285714
|
|
0.9 0.9 0.73333333 0.81818182]
|
|
|
|
mean value: 0.7798645798645799
|
|
|
|
key: train_precision
|
|
value: [0.90178571 0.93693694 0.91150442 0.92035398 0.91891892 0.92792793
|
|
0.90434783 0.9137931 0.93636364 0.92105263]
|
|
|
|
mean value: 0.919298510262696
|
|
|
|
key: test_recall
|
|
value: [0.75 0.66666667 0.91666667 0.83333333 0.66666667 0.75
|
|
0.75 0.75 0.91666667 0.75 ]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [0.93518519 0.96296296 0.9537037 0.96296296 0.94444444 0.9537037
|
|
0.96296296 0.98148148 0.9537037 0.97222222]
|
|
|
|
mean value: 0.9583333333333334
|
|
|
|
key: test_roc_auc
|
|
value: [0.83333333 0.79166667 0.79166667 0.70833333 0.625 0.66666667
|
|
0.83333333 0.83333333 0.79166667 0.79166667]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [0.91666667 0.94907407 0.93055556 0.93981481 0.93055556 0.93981481
|
|
0.93055556 0.94444444 0.94444444 0.94444444]
|
|
|
|
mean value: 0.937037037037037
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.61538462 0.6875 0.58823529 0.47058824 0.52941176
|
|
0.69230769 0.69230769 0.6875 0.64285714]
|
|
|
|
mean value: 0.6298400129282482
|
|
|
|
key: train_jcc
|
|
value: [0.8487395 0.90434783 0.87288136 0.88888889 0.87179487 0.88793103
|
|
0.87394958 0.89830508 0.89565217 0.8974359 ]
|
|
|
|
mean value: 0.8839926208910636
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.23147917 0.30225873 0.2818923 0.12417746 0.19498205 0.23902297
|
|
0.23224235 0.23423767 0.19799876 0.15895414]
|
|
|
|
mean value: 0.21972455978393554
|
|
|
|
key: score_time
|
|
value: [0.02185488 0.02039647 0.02102041 0.01203465 0.02082801 0.02240419
|
|
0.02174664 0.02311754 0.0120647 0.02047658]
|
|
|
|
mean value: 0.019594407081604003
|
|
|
|
key: test_mcc
|
|
value: [0.75261781 0.60246408 0.70710678 0.53033009 0.16666667 0.33333333
|
|
0.77459667 0.75261781 0.75261781 0.6761234 ]
|
|
|
|
mean value: 0.6048474443196609
|
|
|
|
key: train_mcc
|
|
value: [0.73864041 0.78978412 0.77120096 0.78869542 0.98148148 0.77013788
|
|
0.75392071 0.78262379 0.77898084 0.77013788]
|
|
|
|
mean value: 0.7925603488837074
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.79166667 0.83333333 0.75 0.58333333 0.66666667
|
|
0.875 0.875 0.875 0.83333333]
|
|
|
|
mean value: 0.7958333333333334
|
|
|
|
key: train_accuracy
|
|
value: [0.86574074 0.89351852 0.88425926 0.89351852 0.99074074 0.88425926
|
|
0.875 0.88888889 0.88888889 0.88425926]
|
|
|
|
mean value: 0.8949074074074074
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.76190476 0.85714286 0.78571429 0.58333333 0.66666667
|
|
0.85714286 0.86956522 0.88 0.84615385]
|
|
|
|
mean value: 0.7977189042841216
|
|
|
|
key: train_fscore
|
|
value: [0.87445887 0.89777778 0.88888889 0.89686099 0.99074074 0.88789238
|
|
0.88105727 0.89473684 0.89189189 0.88789238]
|
|
|
|
mean value: 0.8992198024496217
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.88888889 0.75 0.6875 0.58333333 0.66666667
|
|
1. 0.90909091 0.84615385 0.78571429]
|
|
|
|
mean value: 0.8026438838938839
|
|
|
|
key: train_precision
|
|
value: [0.82113821 0.86324786 0.85470085 0.86956522 0.99074074 0.86086957
|
|
0.84033613 0.85 0.86842105 0.86086957]
|
|
|
|
mean value: 0.867988920498302
|
|
|
|
key: test_recall
|
|
value: [0.83333333 0.66666667 1. 0.91666667 0.58333333 0.66666667
|
|
0.75 0.83333333 0.91666667 0.91666667]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [0.93518519 0.93518519 0.92592593 0.92592593 0.99074074 0.91666667
|
|
0.92592593 0.94444444 0.91666667 0.91666667]
|
|
|
|
mean value: 0.9333333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.79166667 0.83333333 0.75 0.58333333 0.66666667
|
|
0.875 0.875 0.875 0.83333333]
|
|
|
|
mean value: 0.7958333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.86574074 0.89351852 0.88425926 0.89351852 0.99074074 0.88425926
|
|
0.875 0.88888889 0.88888889 0.88425926]
|
|
|
|
mean value: 0.8949074074074074
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.61538462 0.75 0.64705882 0.41176471 0.5
|
|
0.75 0.76923077 0.78571429 0.73333333]
|
|
|
|
mean value: 0.6731717302305538
|
|
|
|
key: train_jcc
|
|
value: [0.77692308 0.81451613 0.8 0.81300813 0.98165138 0.7983871
|
|
0.78740157 0.80952381 0.80487805 0.7983871 ]
|
|
|
|
mean value: 0.8184676338839258
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.7
|