19325 lines
935 KiB
Text
19325 lines
935 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_orig.py:550: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 424
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 424
|
|
ncols: 265
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 102
|
|
log10_or_mychisq 102
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 166
|
|
No. of categorical features: 7
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({1: 114, 0: 71}) Data dim: (185, 173)
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data: ORIGINAL training
|
|
actual values: training set
|
|
imputed values: blind test set
|
|
Train data size: (185, 173)
|
|
Test data size: (239, 173)
|
|
y_train numbers: Counter({1: 114, 0: 71})
|
|
y_train ratio: 0.6228070175438597
|
|
|
|
y_test_numbers: Counter({0: 120, 1: 119})
|
|
y_test ratio: 1.0084033613445378
|
|
-------------------------------------------------------------
|
|
Simple Random OverSampling
|
|
Counter({0: 114, 1: 114})
|
|
(228, 173)
|
|
Simple Random UnderSampling
|
|
Counter({0: 71, 1: 71})
|
|
(142, 173)
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 114, 1: 114})
|
|
(228, 173)
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 114, 1: 114})
|
|
(228, 173)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: ORIGINAL
|
|
Gene name: pncA
|
|
Drug name: pyrazinamide
|
|
|
|
Output directory: /home/tanu/git/Data/pyrazinamide/output/ml/tts_orig/
|
|
|
|
Sanity checks:
|
|
Total input features: 173
|
|
|
|
Training data size: (185, 173)
|
|
Test data size: (239, 173)
|
|
|
|
Target feature numbers (training data): Counter({1: 114, 0: 71})
|
|
Target features ratio (training data: 0.6228070175438597
|
|
|
|
Target feature numbers (test data): Counter({0: 120, 1: 119})
|
|
Target features ratio (test data): 1.0084033613445378
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 34
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03524065 0.031986 0.03195024 0.03289199 0.02481055 0.05734158
|
|
0.06366634 0.04798388 0.02917767 0.03274918]
|
|
|
|
mean value: 0.038779807090759275
|
|
|
|
key: score_time
|
|
value: [0.01227498 0.01202679 0.01314306 0.01324391 0.0123229 0.01349568
|
|
0.01344991 0.01196861 0.01190662 0.01218939]
|
|
|
|
mean value: 0.01260218620300293
|
|
|
|
key: test_mcc
|
|
value: [0.33796318 0.54761905 0.0952381 0.77380952 0.65477023 0.53246753
|
|
0.89188259 0.12182898 0.2548236 0.2987013 ]
|
|
|
|
mean value: 0.45091040737717836
|
|
|
|
key: train_mcc
|
|
value: [0.83287487 0.78705463 0.79925792 0.81149011 0.76271746 0.81037732
|
|
0.8120727 0.82431059 0.82431059 0.84779256]
|
|
|
|
mean value: 0.8112258763592134
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.78947368 0.57894737 0.89473684 0.84210526 0.77777778
|
|
0.94444444 0.61111111 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7456140350877193
|
|
|
|
key: train_accuracy
|
|
value: [0.92168675 0.89759036 0.90361446 0.90963855 0.88554217 0.91017964
|
|
0.91017964 0.91616766 0.91616766 0.92814371]
|
|
|
|
mean value: 0.909891061250992
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.83333333 0.66666667 0.91666667 0.88 0.81818182
|
|
0.95238095 0.72 0.76923077 0.72727273]
|
|
|
|
mean value: 0.8033732933732933
|
|
|
|
key: train_fscore
|
|
value: [0.93838863 0.92165899 0.92592593 0.93023256 0.91324201 0.93023256
|
|
0.93087558 0.93518519 0.93518519 0.94339623]
|
|
|
|
mean value: 0.9304322835927279
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.83333333 0.66666667 0.91666667 0.84615385 0.81818182
|
|
1. 0.64285714 0.66666667 0.72727273]
|
|
|
|
mean value: 0.781010656010656
|
|
|
|
key: train_precision
|
|
value: [0.91666667 0.86956522 0.87719298 0.88495575 0.85470085 0.89285714
|
|
0.88596491 0.89380531 0.89380531 0.91743119]
|
|
|
|
mean value: 0.8886945340694777
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.83333333 0.66666667 0.91666667 0.91666667 0.81818182
|
|
0.90909091 0.81818182 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8333333333333334
|
|
|
|
key: train_recall
|
|
value: [0.96116505 0.98039216 0.98039216 0.98039216 0.98039216 0.97087379
|
|
0.98058252 0.98058252 0.98058252 0.97087379]
|
|
|
|
mean value: 0.9766228821625738
|
|
|
|
key: test_roc_auc
|
|
value: [0.65909091 0.77380952 0.54761905 0.88690476 0.81547619 0.76623377
|
|
0.95454545 0.55194805 0.5974026 0.64935065]
|
|
|
|
mean value: 0.7202380952380952
|
|
|
|
key: train_roc_auc
|
|
value: [0.90915395 0.87300858 0.88082108 0.88863358 0.85738358 0.89168689
|
|
0.88872876 0.89654126 0.89654126 0.91512439]
|
|
|
|
mean value: 0.8897623339384297
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.71428571 0.5 0.84615385 0.78571429 0.69230769
|
|
0.90909091 0.5625 0.625 0.57142857]
|
|
|
|
mean value: 0.6806481018981019
|
|
|
|
key: train_jcc
|
|
value: [0.88392857 0.85470085 0.86206897 0.86956522 0.84033613 0.86956522
|
|
0.87068966 0.87826087 0.87826087 0.89285714]
|
|
|
|
mean value: 0.870023349804305
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.93467402 1.02162886 0.67857623 0.93024802 0.72668695 0.85950851
|
|
0.93305063 0.77515554 0.76321864 1.07244253]
|
|
|
|
mean value: 0.8695189952850342
|
|
|
|
key: score_time
|
|
value: [0.01315522 0.01336765 0.01328516 0.01339579 0.0135901 0.01321673
|
|
0.01317739 0.01686883 0.01603985 0.01210642]
|
|
|
|
mean value: 0.013820314407348632
|
|
|
|
key: test_mcc
|
|
value: [0.60553007 0.45361105 0.67460105 0.80507649 0.77380952 0.66254135
|
|
0.56407607 0.64465837 0.44320263 0.2987013 ]
|
|
|
|
mean value: 0.5925807909458823
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 0.98737524
|
|
1. 0.98737524 1. 0.91120799]
|
|
|
|
mean value: 0.9885958461530414
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.73684211 0.84210526 0.89473684 0.89473684 0.83333333
|
|
0.72222222 0.83333333 0.72222222 0.66666667]
|
|
|
|
mean value: 0.7935672514619883
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.99401198
|
|
1. 0.99401198 1. 0.95808383]
|
|
|
|
mean value: 0.9946107784431137
|
|
|
|
key: test_fscore
|
|
value: [0.84615385 0.7826087 0.86956522 0.90909091 0.91666667 0.85714286
|
|
0.70588235 0.86956522 0.81481481 0.72727273]
|
|
|
|
mean value: 0.829876330451778
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
train_fscore
|
|
value: [1. 1. 1. 1. 1. 0.99516908
|
|
1. 0.99516908 1. 0.96650718]
|
|
|
|
mean value: 0.99568453412847
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.81818182 0.90909091 1. 0.91666667 0.9
|
|
1. 0.83333333 0.6875 0.72727273]
|
|
|
|
mean value: 0.8525378787878788
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.99038462
|
|
1. 0.99038462 1. 0.95283019]
|
|
|
|
mean value: 0.9933599419448476
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 0.83333333 0.83333333 0.91666667 0.81818182
|
|
0.54545455 0.90909091 1. 0.72727273]
|
|
|
|
mean value: 0.8333333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.98058252]
|
|
|
|
mean value: 0.9980582524271845
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.73214286 0.8452381 0.91666667 0.88690476 0.83766234
|
|
0.77272727 0.81168831 0.64285714 0.64935065]
|
|
|
|
mean value: 0.7845238095238095
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.9921875
|
|
1. 0.9921875 1. 0.95122876]
|
|
|
|
mean value: 0.9935603762135923
|
|
|
|
key: test_jcc
|
|
value: [0.73333333 0.64285714 0.76923077 0.83333333 0.84615385 0.75
|
|
0.54545455 0.76923077 0.6875 0.57142857]
|
|
|
|
mean value: 0.7148522311022311
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 0.99038462
|
|
1. 0.99038462 1. 0.93518519]
|
|
|
|
mean value: 0.9915954415954416
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01257062 0.01140189 0.00913525 0.00892401 0.00878763 0.00891304
|
|
0.00894451 0.00909281 0.00889635 0.00975704]
|
|
|
|
mean value: 0.009642314910888673
|
|
|
|
key: score_time
|
|
value: [0.01189208 0.00911927 0.00909996 0.00931287 0.00870299 0.0086081
|
|
0.00883341 0.00891304 0.00927591 0.00955582]
|
|
|
|
mean value: 0.009331345558166504
|
|
|
|
key: test_mcc
|
|
value: [0.34405118 0.26772484 0.03912304 0.40849122 0.14085904 0.26856633
|
|
0.2987013 0.06493506 0.56061191 0.40291148]
|
|
|
|
mean value: 0.2795975400517249
|
|
|
|
key: train_mcc
|
|
value: [0.57098929 0.35088235 0.40877514 0.55947749 0.40877514 0.55309666
|
|
0.46678391 0.53583369 0.49453247 0.45408591]
|
|
|
|
mean value: 0.4803232036222982
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.68421053 0.57894737 0.73684211 0.63157895 0.66666667
|
|
0.66666667 0.55555556 0.77777778 0.72222222]
|
|
|
|
mean value: 0.67046783625731
|
|
|
|
key: train_accuracy
|
|
value: [0.80120482 0.70481928 0.72891566 0.79518072 0.72891566 0.79041916
|
|
0.75449102 0.78443114 0.76646707 0.74850299]
|
|
|
|
mean value: 0.7603347521823822
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.78571429 0.69230769 0.81481481 0.74074074 0.75
|
|
0.72727273 0.63636364 0.84615385 0.7826087 ]
|
|
|
|
mean value: 0.7545207208250686
|
|
|
|
key: train_fscore
|
|
value: [0.84507042 0.79324895 0.79638009 0.84545455 0.79638009 0.83253589
|
|
0.81278539 0.8317757 0.8202765 0.80733945]
|
|
|
|
mean value: 0.8181247015599945
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.6875 0.64285714 0.73333333 0.66666667 0.69230769
|
|
0.72727273 0.63636364 0.73333333 0.75 ]
|
|
|
|
mean value: 0.6936301198801199
|
|
|
|
key: train_precision
|
|
value: [0.81818182 0.6962963 0.7394958 0.78813559 0.7394958 0.82075472
|
|
0.76724138 0.8018018 0.78070175 0.76521739]
|
|
|
|
mean value: 0.7717322348120701
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.91666667 0.75 0.91666667 0.83333333 0.81818182
|
|
0.72727273 0.63636364 1. 0.81818182]
|
|
|
|
mean value: 0.8325757575757575
|
|
|
|
key: train_recall
|
|
value: [0.87378641 0.92156863 0.8627451 0.91176471 0.8627451 0.84466019
|
|
0.86407767 0.86407767 0.86407767 0.85436893]
|
|
|
|
mean value: 0.8723872073101084
|
|
|
|
key: test_roc_auc
|
|
value: [0.64204545 0.60119048 0.51785714 0.67261905 0.55952381 0.62337662
|
|
0.64935065 0.53246753 0.71428571 0.69480519]
|
|
|
|
mean value: 0.6207521645021645
|
|
|
|
key: train_roc_auc
|
|
value: [0.77816305 0.64047181 0.68918505 0.76056985 0.68918505 0.7738926
|
|
0.72110133 0.76016383 0.73672633 0.71624697]
|
|
|
|
mean value: 0.7265705877820383
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.64705882 0.52941176 0.6875 0.58823529 0.6
|
|
0.57142857 0.46666667 0.73333333 0.64285714]
|
|
|
|
mean value: 0.6091491596638655
|
|
|
|
key: train_jcc
|
|
value: [0.73170732 0.65734266 0.66165414 0.73228346 0.66165414 0.71311475
|
|
0.68461538 0.712 0.6953125 0.67692308]
|
|
|
|
mean value: 0.6926607425296271
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01034927 0.00944018 0.00949478 0.00973845 0.00962996 0.00887012
|
|
0.0089128 0.00977898 0.00905228 0.00937176]
|
|
|
|
mean value: 0.009463858604431153
|
|
|
|
key: score_time
|
|
value: [0.00965571 0.00940299 0.00865912 0.00867152 0.009197 0.00870323
|
|
0.00895262 0.0092957 0.00875807 0.00866055]
|
|
|
|
mean value: 0.008995652198791504
|
|
|
|
key: test_mcc
|
|
value: [ 0.23262105 0.23262105 -0.01163105 0.28690229 0.32142857 0.34188173
|
|
-0.02548236 -0.32232919 -0.16883117 0.43320011]
|
|
|
|
mean value: 0.1320381044112035
|
|
|
|
key: train_mcc
|
|
value: [0.38992541 0.37624725 0.38970588 0.37720787 0.42954422 0.36848818
|
|
0.4353138 0.48789999 0.33479889 0.37453283]
|
|
|
|
mean value: 0.39636643214511924
|
|
|
|
key: test_accuracy
|
|
value: [0.63157895 0.63157895 0.47368421 0.68421053 0.68421053 0.66666667
|
|
0.5 0.38888889 0.44444444 0.72222222]
|
|
|
|
mean value: 0.5827485380116959
|
|
|
|
key: train_accuracy
|
|
value: [0.71084337 0.69879518 0.71084337 0.71084337 0.72891566 0.69461078
|
|
0.73053892 0.76047904 0.68263473 0.7005988 ]
|
|
|
|
mean value: 0.7129103239304524
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.69565217 0.5 0.76923077 0.75 0.7
|
|
0.57142857 0.52173913 0.54545455 0.76190476]
|
|
|
|
mean value: 0.6511062126279518
|
|
|
|
key: train_fscore
|
|
value: [0.76470588 0.74747475 0.76470588 0.77358491 0.77832512 0.74371859
|
|
0.77832512 0.80952381 0.73891626 0.75247525]
|
|
|
|
mean value: 0.7651755570317448
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.72727273 0.625 0.71428571 0.75 0.77777778
|
|
0.6 0.5 0.54545455 0.8 ]
|
|
|
|
mean value: 0.6706457431457431
|
|
|
|
key: train_precision
|
|
value: [0.77227723 0.77083333 0.76470588 0.74545455 0.78217822 0.77083333
|
|
0.79 0.79439252 0.75 0.76767677]
|
|
|
|
mean value: 0.7708351831059962
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.66666667 0.41666667 0.83333333 0.75 0.63636364
|
|
0.54545455 0.54545455 0.54545455 0.72727273]
|
|
|
|
mean value: 0.6393939393939394
|
|
|
|
key: train_recall
|
|
value: [0.75728155 0.7254902 0.76470588 0.80392157 0.7745098 0.7184466
|
|
0.76699029 0.82524272 0.72815534 0.73786408]
|
|
|
|
mean value: 0.7602608033504664
|
|
|
|
key: test_roc_auc
|
|
value: [0.61363636 0.61904762 0.49404762 0.63095238 0.66071429 0.67532468
|
|
0.48701299 0.34415584 0.41558442 0.72077922]
|
|
|
|
mean value: 0.5661255411255411
|
|
|
|
key: train_roc_auc
|
|
value: [0.69610109 0.6908701 0.69485294 0.68321078 0.7153799 0.6873483
|
|
0.71943265 0.74074636 0.66876517 0.68924454]
|
|
|
|
mean value: 0.6985951834212649
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.53333333 0.33333333 0.625 0.6 0.53846154
|
|
0.4 0.35294118 0.375 0.61538462]
|
|
|
|
mean value: 0.4906787330316742
|
|
|
|
key: train_jcc
|
|
value: [0.61904762 0.59677419 0.61904762 0.63076923 0.63709677 0.592
|
|
0.63709677 0.68 0.5859375 0.6031746 ]
|
|
|
|
mean value: 0.6200944313974556
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01021385 0.0117085 0.00883484 0.00892735 0.0085423 0.00943303
|
|
0.00997519 0.00961924 0.00955629 0.00957227]
|
|
|
|
mean value: 0.009638285636901856
|
|
|
|
key: score_time
|
|
value: [0.05058432 0.03134537 0.01030421 0.00968504 0.01054215 0.01008773
|
|
0.01054263 0.01056457 0.01067567 0.01038671]
|
|
|
|
mean value: 0.01647183895111084
|
|
|
|
key: test_mcc
|
|
value: [-0.07954545 -0.26196842 -0.28414557 -0.33071891 -0.20865621 -0.16883117
|
|
-0.05096472 -0.0805823 -0.42640143 0.26856633]
|
|
|
|
mean value: -0.1623247858043403
|
|
|
|
key: train_mcc
|
|
value: [0.40149161 0.42213076 0.37917381 0.40791958 0.42567075 0.39903847
|
|
0.35572255 0.39451676 0.39528332 0.41049956]
|
|
|
|
mean value: 0.399144718616529
|
|
|
|
key: test_accuracy
|
|
value: [0.47368421 0.52631579 0.42105263 0.47368421 0.47368421 0.44444444
|
|
0.55555556 0.5 0.38888889 0.66666667]
|
|
|
|
mean value: 0.49239766081871345
|
|
|
|
key: train_accuracy
|
|
value: [0.72891566 0.73493976 0.71686747 0.72891566 0.73493976 0.7245509
|
|
0.70658683 0.7245509 0.7245509 0.73053892]
|
|
|
|
mean value: 0.7255356756366784
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.68965517 0.56 0.64285714 0.61538462 0.54545455
|
|
0.69230769 0.60869565 0.56 0.75 ]
|
|
|
|
mean value: 0.6209809366046247
|
|
|
|
key: train_fscore
|
|
value: [0.80176211 0.8018018 0.79111111 0.79820628 0.7962963 0.79090909
|
|
0.78026906 0.8 0.79646018 0.79820628]
|
|
|
|
mean value: 0.7955022205996671
|
|
|
|
key: test_precision
|
|
value: [0.54545455 0.58823529 0.53846154 0.5625 0.57142857 0.54545455
|
|
0.6 0.58333333 0.5 0.69230769]
|
|
|
|
mean value: 0.5727175520557873
|
|
|
|
key: train_precision
|
|
value: [0.73387097 0.74166667 0.72357724 0.73553719 0.75438596 0.74358974
|
|
0.725 0.72440945 0.73170732 0.74166667]
|
|
|
|
mean value: 0.7355411201324364
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.83333333 0.58333333 0.75 0.66666667 0.54545455
|
|
0.81818182 0.63636364 0.63636364 0.81818182]
|
|
|
|
mean value: 0.6833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.88349515 0.87254902 0.87254902 0.87254902 0.84313725 0.84466019
|
|
0.84466019 0.89320388 0.87378641 0.86407767]
|
|
|
|
mean value: 0.8664667808871122
|
|
|
|
key: test_roc_auc
|
|
value: [0.46022727 0.41666667 0.36309524 0.375 0.4047619 0.41558442
|
|
0.48051948 0.46103896 0.31818182 0.62337662]
|
|
|
|
mean value: 0.4318452380952381
|
|
|
|
key: train_roc_auc
|
|
value: [0.67984281 0.69408701 0.67064951 0.68627451 0.70281863 0.6879551
|
|
0.6645176 0.67316444 0.6790807 0.68985133]
|
|
|
|
mean value: 0.6828241642530799
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.52631579 0.38888889 0.47368421 0.44444444 0.375
|
|
0.52941176 0.4375 0.38888889 0.6 ]
|
|
|
|
mean value: 0.45391339869281044
|
|
|
|
key: train_jcc
|
|
value: [0.66911765 0.66917293 0.65441176 0.6641791 0.66153846 0.65413534
|
|
0.63970588 0.66666667 0.66176471 0.6641791 ]
|
|
|
|
mean value: 0.6604871607837044
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0124259 0.0118289 0.01121783 0.01115513 0.01249623 0.01109719
|
|
0.01236582 0.01226473 0.01196218 0.01111054]
|
|
|
|
mean value: 0.011792445182800293
|
|
|
|
key: score_time
|
|
value: [0.01009512 0.00946808 0.00996041 0.0101192 0.00939679 0.01001549
|
|
0.00991511 0.0099535 0.00997281 0.00915146]
|
|
|
|
mean value: 0.009804797172546387
|
|
|
|
key: test_mcc
|
|
value: [ 0.40219983 0.26772484 0.3086067 0.3086067 0.3086067 0.39594419
|
|
0.39594419 -0.05096472 0.3040345 0.39594419]
|
|
|
|
mean value: 0.303664710755213
|
|
|
|
key: train_mcc
|
|
value: [0.5635375 0.54404241 0.59782919 0.56865593 0.53158234 0.54476067
|
|
0.53640723 0.58634752 0.54476067 0.59862298]
|
|
|
|
mean value: 0.5616546427256583
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.68421053 0.68421053 0.68421053 0.68421053 0.72222222
|
|
0.72222222 0.55555556 0.66666667 0.72222222]
|
|
|
|
mean value: 0.6809941520467836
|
|
|
|
key: train_accuracy
|
|
value: [0.78313253 0.77108434 0.80120482 0.78313253 0.76506024 0.77245509
|
|
0.77245509 0.79640719 0.77245509 0.80239521]
|
|
|
|
mean value: 0.7819782122501984
|
|
|
|
key: test_fscore
|
|
value: [0.78571429 0.78571429 0.8 0.8 0.8 0.8
|
|
0.8 0.69230769 0.78571429 0.8 ]
|
|
|
|
mean value: 0.784945054945055
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[0.85123967 0.84297521 0.85957447 0.85 0.83950617 0.8442623
|
|
0.84297521 0.85714286 0.8442623 0.86075949]
|
|
|
|
mean value: 0.8492697664546919
|
|
|
|
key: test_precision
|
|
value: [0.64705882 0.6875 0.66666667 0.66666667 0.66666667 0.71428571
|
|
0.71428571 0.6 0.64705882 0.71428571]
|
|
|
|
mean value: 0.6724474789915966
|
|
|
|
key: train_precision
|
|
value: [0.74100719 0.72857143 0.7593985 0.73913043 0.72340426 0.73049645
|
|
0.73381295 0.75555556 0.73049645 0.76119403]
|
|
|
|
mean value: 0.74030672520064
|
|
|
|
key: test_recall
|
|
value: [1. 0.91666667 1. 1. 1. 0.90909091
|
|
0.90909091 0.81818182 1. 0.90909091]
|
|
|
|
mean value: 0.9462121212121212
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.99019608 1. 1. 1.
|
|
0.99029126 0.99029126 1. 0.99029126]
|
|
|
|
mean value: 0.9961069864839139
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.60119048 0.57142857 0.57142857 0.57142857 0.66883117
|
|
0.66883117 0.48051948 0.57142857 0.66883117]
|
|
|
|
mean value: 0.5998917748917749
|
|
|
|
key: train_roc_auc
|
|
value: [0.71428571 0.703125 0.74509804 0.71875 0.6953125 0.703125
|
|
0.70608313 0.73733313 0.703125 0.74514563]
|
|
|
|
mean value: 0.7171383146705284
|
|
|
|
key: test_jcc
|
|
value: [0.64705882 0.64705882 0.66666667 0.66666667 0.66666667 0.66666667
|
|
0.66666667 0.52941176 0.64705882 0.66666667]
|
|
|
|
mean value: 0.6470588235294118
|
|
|
|
key: train_jcc
|
|
value: [0.74100719 0.72857143 0.75373134 0.73913043 0.72340426 0.73049645
|
|
0.72857143 0.75 0.73049645 0.75555556]
|
|
|
|
mean value: 0.7380964548129775
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.14798498 0.80445504 0.68168712 0.778126 0.67166209 0.71176672
|
|
0.82175732 0.63936996 0.6600368 0.79345798]
|
|
|
|
mean value: 0.7710304021835327
|
|
|
|
key: score_time
|
|
value: [0.01561236 0.01476002 0.01496482 0.01485348 0.01530099 0.01830029
|
|
0.01525712 0.01215148 0.0121603 0.02092552]
|
|
|
|
mean value: 0.015428638458251953
|
|
|
|
key: test_mcc
|
|
value: [ 0.33796318 0.18531233 0.32142857 0.32142857 0.45361105 0.64465837
|
|
0.79772404 -0.0805823 0.01413507 0.40291148]
|
|
|
|
mean value: 0.33985903712040866
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.63157895 0.68421053 0.68421053 0.73684211 0.83333333
|
|
0.88888889 0.5 0.55555556 0.72222222]
|
|
|
|
mean value: 0.6921052631578948
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.72 0.75 0.75 0.7826087 0.86956522
|
|
0.9 0.60869565 0.66666667 0.7826087 ]
|
|
|
|
mean value: 0.7580144927536232
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.69230769 0.69230769 0.75 0.75 0.81818182 0.83333333
|
|
1. 0.58333333 0.61538462 0.75 ]
|
|
|
|
mean value: 0.7484848484848485
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.75 0.75 0.75 0.75 0.90909091
|
|
0.81818182 0.63636364 0.72727273 0.81818182]
|
|
|
|
mean value: 0.7727272727272727
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65909091 0.58928571 0.66071429 0.66071429 0.73214286 0.81168831
|
|
0.90909091 0.46103896 0.50649351 0.69480519]
|
|
|
|
mean value: 0.6685064935064935
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5625 0.6 0.6 0.64285714 0.76923077
|
|
0.81818182 0.4375 0.5 0.64285714]
|
|
|
|
mean value: 0.6173126873126873
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0201478 0.01317859 0.01518607 0.01245022 0.01219487 0.01223302
|
|
0.01228309 0.01388884 0.01354384 0.01307154]
|
|
|
|
mean value: 0.013817787170410156
|
|
|
|
key: score_time
|
|
value: [0.01178908 0.00907445 0.00897789 0.00866628 0.00874162 0.00875974
|
|
0.00865197 0.008744 0.00954509 0.00889468]
|
|
|
|
mean value: 0.009184479713439941
|
|
|
|
key: test_mcc
|
|
value: [0.45361105 1. 0.65133895 0.80507649 0.54761905 0.66254135
|
|
0.79772404 0.26856633 0.88640526 0.53246753]
|
|
|
|
mean value: 0.6605350037589758
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 1. 0.78947368 0.89473684 0.78947368 0.83333333
|
|
0.88888889 0.66666667 0.94444444 0.77777778]
|
|
|
|
mean value: 0.8321637426900584
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 1. 0.8 0.90909091 0.83333333 0.85714286
|
|
0.9 0.75 0.95652174 0.81818182]
|
|
|
|
mean value: 0.8606879352531527
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 1. 1. 0.83333333 0.9
|
|
1. 0.69230769 0.91666667 0.81818182]
|
|
|
|
mean value: 0.8910489510489511
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 1. 0.66666667 0.83333333 0.83333333 0.81818182
|
|
0.81818182 0.81818182 1. 0.81818182]
|
|
|
|
mean value: 0.8424242424242424
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.72159091 1. 0.83333333 0.91666667 0.77380952 0.83766234
|
|
0.90909091 0.62337662 0.92857143 0.76623377]
|
|
|
|
mean value: 0.8310335497835498
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 1. 0.66666667 0.83333333 0.71428571 0.75
|
|
0.81818182 0.6 0.91666667 0.69230769]
|
|
|
|
mean value: 0.7634299034299035
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09460139 0.09494543 0.09293103 0.09291887 0.09456015 0.09315991
|
|
0.09335065 0.09217739 0.09356213 0.10142875]
|
|
|
|
mean value: 0.09436357021331787
|
|
|
|
key: score_time
|
|
value: [0.01741433 0.01736999 0.01729822 0.01721883 0.01758862 0.01811028
|
|
0.01707053 0.01714253 0.01738429 0.01856899]
|
|
|
|
mean value: 0.017516660690307616
|
|
|
|
key: test_mcc
|
|
value: [0.08257228 0.53468154 0.1495142 0.42004128 0.32142857 0.39594419
|
|
0.76623377 0.39594419 0.2548236 0.20385888]
|
|
|
|
mean value: 0.35250424955725035
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.57894737 0.78947368 0.57894737 0.73684211 0.68421053 0.72222222
|
|
0.88888889 0.72222222 0.66666667 0.61111111]
|
|
|
|
mean value: 0.697953216374269
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.69230769 0.84615385 0.63636364 0.8 0.75 0.8
|
|
0.90909091 0.8 0.76923077 0.66666667]
|
|
|
|
mean value: 0.766981351981352
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 0.78571429 0.7 0.76923077 0.75 0.71428571
|
|
0.90909091 0.71428571 0.66666667 0.7 ]
|
|
|
|
mean value: 0.7309274059274059
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.91666667 0.58333333 0.83333333 0.75 0.90909091
|
|
0.90909091 0.90909091 0.90909091 0.63636364]
|
|
|
|
mean value: 0.8174242424242424
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.53409091 0.74404762 0.57738095 0.70238095 0.66071429 0.66883117
|
|
0.88311688 0.66883117 0.5974026 0.6038961 ]
|
|
|
|
mean value: 0.664069264069264
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.52941176 0.73333333 0.46666667 0.66666667 0.6 0.66666667
|
|
0.83333333 0.66666667 0.625 0.5 ]
|
|
|
|
mean value: 0.6287745098039216
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01018333 0.0099957 0.00997639 0.00996017 0.01004481 0.00998259
|
|
0.00940919 0.00883722 0.00895667 0.00889516]
|
|
|
|
mean value: 0.009624123573303223
|
|
|
|
key: score_time
|
|
value: [0.00936508 0.00946832 0.00939035 0.00943542 0.00943351 0.00934362
|
|
0.00854778 0.00865889 0.0086751 0.00854993]
|
|
|
|
mean value: 0.009086799621582032
|
|
|
|
key: test_mcc
|
|
value: [ 0.25844328 0.0952381 0.13095238 0.1495142 0.32142857 0.43320011
|
|
0.48416483 -0.0805823 0.64465837 0.40291148]
|
|
|
|
mean value: 0.2839929034172316
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.63157895 0.57894737 0.52631579 0.57894737 0.68421053 0.72222222
|
|
0.72222222 0.5 0.83333333 0.72222222]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.52631579 0.63636364 0.75 0.76190476
|
|
0.73684211 0.60869565 0.86956522 0.7826087 ]
|
|
|
|
mean value: 0.7005629191555965
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.66666667 0.71428571 0.7 0.75 0.8
|
|
0.875 0.58333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7372619047619048
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.66666667 0.41666667 0.58333333 0.75 0.72727273
|
|
0.63636364 0.63636364 0.90909091 0.81818182]
|
|
|
|
mean value: 0.678030303030303
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.63068182 0.54761905 0.56547619 0.57738095 0.66071429 0.72077922
|
|
0.74675325 0.46103896 0.81168831 0.69480519]
|
|
|
|
mean value: 0.641693722943723
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.35714286 0.46666667 0.6 0.61538462
|
|
0.58333333 0.4375 0.76923077 0.64285714]
|
|
|
|
mean value: 0.5472115384615385
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.21576595 1.23462939 1.23632216 1.24061584 1.27298141 1.25783706
|
|
1.2260108 1.21766496 1.2200973 1.23109865]
|
|
|
|
mean value: 1.2353023529052733
|
|
|
|
key: score_time
|
|
value: [0.08846188 0.09100533 0.09566069 0.15491486 0.09412932 0.08944058
|
|
0.08805823 0.09122467 0.09319806 0.094805 ]
|
|
|
|
mean value: 0.09808986186981201
|
|
|
|
key: test_mcc
|
|
value: [0.60553007 0.88949918 0.67460105 0.65477023 0.56694671 0.64465837
|
|
0.89188259 0.39594419 0.77742884 0.52299758]
|
|
|
|
mean value: 0.6624258812978574
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.94736842 0.84210526 0.84210526 0.78947368 0.83333333
|
|
0.94444444 0.72222222 0.88888889 0.77777778]
|
|
|
|
mean value: 0.837719298245614
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84615385 0.96 0.86956522 0.88 0.85714286 0.86956522
|
|
0.95238095 0.8 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8784808090460264
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.92307692 0.90909091 0.84615385 0.75 0.83333333
|
|
1. 0.71428571 0.84615385 0.76923077]
|
|
|
|
mean value: 0.8324658674658675
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.83333333 0.91666667 1. 0.90909091
|
|
0.90909091 0.90909091 1. 0.90909091]
|
|
|
|
mean value: 0.9386363636363636
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.92857143 0.8452381 0.81547619 0.71428571 0.81168831
|
|
0.95454545 0.66883117 0.85714286 0.74025974]
|
|
|
|
mean value: 0.8086038961038962
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.73333333 0.92307692 0.76923077 0.78571429 0.75 0.76923077
|
|
0.90909091 0.66666667 0.84615385 0.71428571]
|
|
|
|
mean value: 0.7866783216783216
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.74438 0.86402607 0.87671947 0.88783979 0.97754407 0.87419868
|
|
0.87052846 0.87867832 0.88394403 0.91006446]
|
|
|
|
mean value: 0.9767923355102539
|
|
|
|
key: score_time
|
|
value: [0.22032857 0.17637062 0.18659782 0.2488842 0.18379068 0.22443295
|
|
0.2052598 0.24423671 0.17268133 0.18593717]
|
|
|
|
mean value: 0.20485198497772217
|
|
|
|
key: test_mcc
|
|
value: [0.60553007 0.65477023 0.53468154 0.88949918 0.53468154 0.39594419
|
|
0.76623377 0.2548236 0.67005939 0.67005939]
|
|
|
|
mean value: 0.5976282906983714
|
|
|
|
key: train_mcc
|
|
value: [0.89849587 0.88685769 0.87457979 0.87457979 0.88685769 0.88899836
|
|
0.8872319 0.91188694 0.86279135 0.89953068]
|
|
|
|
mean value: 0.8871810060846004
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.84210526 0.78947368 0.94736842 0.78947368 0.72222222
|
|
0.88888889 0.66666667 0.83333333 0.83333333]
|
|
|
|
mean value: 0.810233918128655
|
|
|
|
key: train_accuracy
|
|
value: [0.95180723 0.94578313 0.93975904 0.93975904 0.94578313 0.94610778
|
|
0.94610778 0.95808383 0.93413174 0.95209581]
|
|
|
|
mean value: 0.9459418512372845
|
|
|
|
key: test_fscore
|
|
value: [0.84615385 0.88 0.84615385 0.96 0.84615385 0.8
|
|
0.90909091 0.76923077 0.88 0.88 ]
|
|
|
|
mean value: 0.8616783216783217
|
|
|
|
key: train_fscore
|
|
value: [0.96226415 0.95734597 0.95283019 0.95283019 0.95734597 0.95813953
|
|
0.95774648 0.96682464 0.94883721 0.96226415]
|
|
|
|
mean value: 0.9576428489982294
|
|
|
|
key: test_precision
|
|
value: [0.73333333 0.84615385 0.78571429 0.92307692 0.78571429 0.71428571
|
|
0.90909091 0.66666667 0.78571429 0.78571429]
|
|
|
|
mean value: 0.7935464535464536
|
|
|
|
key: train_precision
|
|
value: [0.93577982 0.9266055 0.91818182 0.91818182 0.9266055 0.91964286
|
|
0.92727273 0.94444444 0.91071429 0.93577982]
|
|
|
|
mean value: 0.9263208593139786
|
|
|
|
key: test_recall
|
|
value: [1. 0.91666667 0.91666667 1. 0.91666667 0.90909091
|
|
0.90909091 0.90909091 1. 1. ]
|
|
|
|
mean value: 0.9477272727272728
|
|
|
|
key: train_recall
|
|
value: [0.99029126 0.99019608 0.99019608 0.99019608 0.99019608 1.
|
|
0.99029126 0.99029126 0.99029126 0.99029126]
|
|
|
|
mean value: 0.9912240624405102
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.81547619 0.74404762 0.92857143 0.74404762 0.66883117
|
|
0.88311688 0.5974026 0.78571429 0.78571429]
|
|
|
|
mean value: 0.7702922077922078
|
|
|
|
key: train_roc_auc
|
|
value: [0.93959008 0.93259804 0.92478554 0.92478554 0.93259804 0.9296875
|
|
0.93264563 0.94827063 0.91702063 0.94045813]
|
|
|
|
mean value: 0.9322439756646995
|
|
|
|
key: test_jcc
|
|
value: [0.73333333 0.78571429 0.73333333 0.92307692 0.73333333 0.66666667
|
|
0.83333333 0.625 0.78571429 0.78571429]
|
|
|
|
mean value: 0.760521978021978
|
|
|
|
key: train_jcc
|
|
value: [0.92727273 0.91818182 0.90990991 0.90990991 0.91818182 0.91964286
|
|
0.91891892 0.93577982 0.90265487 0.92727273]
|
|
|
|
mean value: 0.9187725370561085
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01017141 0.01006699 0.01010513 0.01003337 0.01007533 0.0101161
|
|
0.00903153 0.00938582 0.00902843 0.00992584]
|
|
|
|
mean value: 0.009793996810913086
|
|
|
|
key: score_time
|
|
value: [0.00961804 0.00945258 0.00932074 0.00960994 0.0095284 0.00941348
|
|
0.00897074 0.0086844 0.00946951 0.00944734]
|
|
|
|
mean value: 0.009351515769958496
|
|
|
|
key: test_mcc
|
|
value: [ 0.23262105 0.23262105 -0.01163105 0.28690229 0.32142857 0.34188173
|
|
-0.02548236 -0.32232919 -0.16883117 0.43320011]
|
|
|
|
mean value: 0.1320381044112035
|
|
|
|
key: train_mcc
|
|
value: [0.38992541 0.37624725 0.38970588 0.37720787 0.42954422 0.36848818
|
|
0.4353138 0.48789999 0.33479889 0.37453283]
|
|
|
|
mean value: 0.39636643214511924
|
|
|
|
key: test_accuracy
|
|
value: [0.63157895 0.63157895 0.47368421 0.68421053 0.68421053 0.66666667
|
|
0.5 0.38888889 0.44444444 0.72222222]
|
|
|
|
mean value: 0.5827485380116959
|
|
|
|
key: train_accuracy
|
|
value: [0.71084337 0.69879518 0.71084337 0.71084337 0.72891566 0.69461078
|
|
0.73053892 0.76047904 0.68263473 0.7005988 ]
|
|
|
|
mean value: 0.7129103239304524
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.69565217 0.5 0.76923077 0.75 0.7
|
|
0.57142857 0.52173913 0.54545455 0.76190476]
|
|
|
|
mean value: 0.6511062126279518
|
|
|
|
key: train_fscore
|
|
value: [0.76470588 0.74747475 0.76470588 0.77358491 0.77832512 0.74371859
|
|
0.77832512 0.80952381 0.73891626 0.75247525]
|
|
|
|
mean value: 0.7651755570317448
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.72727273 0.625 0.71428571 0.75 0.77777778
|
|
0.6 0.5 0.54545455 0.8 ]
|
|
|
|
mean value: 0.6706457431457431
|
|
|
|
key: train_precision
|
|
value: [0.77227723 0.77083333 0.76470588 0.74545455 0.78217822 0.77083333
|
|
0.79 0.79439252 0.75 0.76767677]
|
|
|
|
mean value: 0.7708351831059962
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.66666667 0.41666667 0.83333333 0.75 0.63636364
|
|
0.54545455 0.54545455 0.54545455 0.72727273]
|
|
|
|
mean value: 0.6393939393939394
|
|
|
|
key: train_recall
|
|
value: [0.75728155 0.7254902 0.76470588 0.80392157 0.7745098 0.7184466
|
|
0.76699029 0.82524272 0.72815534 0.73786408]
|
|
|
|
mean value: 0.7602608033504664
|
|
|
|
key: test_roc_auc
|
|
value: [0.61363636 0.61904762 0.49404762 0.63095238 0.66071429 0.67532468
|
|
0.48701299 0.34415584 0.41558442 0.72077922]
|
|
|
|
mean value: 0.5661255411255411
|
|
|
|
key: train_roc_auc
|
|
value: [0.69610109 0.6908701 0.69485294 0.68321078 0.7153799 0.6873483
|
|
0.71943265 0.74074636 0.66876517 0.68924454]
|
|
|
|
mean value: 0.6985951834212649
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.53333333 0.33333333 0.625 0.6 0.53846154
|
|
0.4 0.35294118 0.375 0.61538462]
|
|
|
|
mean value: 0.4906787330316742
|
|
|
|
key: train_jcc
|
|
value: [0.61904762 0.59677419 0.61904762 0.63076923 0.63709677 0.592
|
|
0.63709677 0.68 0.5859375 0.6031746 ]
|
|
|
|
mean value: 0.6200944313974556
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.09132409 0.05846572 0.06459308 0.05377865 0.05312014 0.0551908
|
|
0.05610585 0.05561304 0.05613637 0.05491066]
|
|
|
|
mean value: 0.059923839569091794
|
|
|
|
key: score_time
|
|
value: [0.01047754 0.01103997 0.01097846 0.0104785 0.01049376 0.01075339
|
|
0.01066589 0.01044464 0.01035118 0.01046562]
|
|
|
|
mean value: 0.010614895820617675
|
|
|
|
key: test_mcc
|
|
value: [0.45361105 0.88949918 0.89559105 1. 0.7824608 0.76623377
|
|
0.89188259 0.39594419 0.88640526 0.43320011]
|
|
|
|
mean value: 0.7394827993424129
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.94736842 0.94736842 1. 0.89473684 0.88888889
|
|
0.94444444 0.72222222 0.94444444 0.72222222]
|
|
|
|
mean value: 0.8748538011695907
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.96 0.95652174 1. 0.92307692 0.90909091
|
|
0.95238095 0.8 0.95652174 0.76190476]
|
|
|
|
mean value: 0.900210572036659
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.92307692 1. 1. 0.85714286 0.90909091
|
|
1. 0.71428571 0.91666667 0.8 ]
|
|
|
|
mean value: 0.887026307026307
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 1. 0.91666667 1. 1. 0.90909091
|
|
0.90909091 0.90909091 1. 0.72727273]
|
|
|
|
mean value: 0.918939393939394
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.72159091 0.92857143 0.95833333 1. 0.85714286 0.88311688
|
|
0.95454545 0.66883117 0.92857143 0.72077922]
|
|
|
|
mean value: 0.8621482683982684
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.92307692 0.91666667 1. 0.85714286 0.83333333
|
|
0.90909091 0.66666667 0.91666667 0.61538462]
|
|
|
|
mean value: 0.828088578088578
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04992199 0.06900644 0.06069613 0.06046605 0.05153871 0.09395623
|
|
0.06998301 0.05825424 0.024194 0.04331779]
|
|
|
|
mean value: 0.05813345909118652
|
|
|
|
key: score_time
|
|
value: [0.02851057 0.03522396 0.02049589 0.02068567 0.02075195 0.01262808
|
|
0.02250338 0.01193881 0.01193452 0.0223732 ]
|
|
|
|
mean value: 0.02070460319519043
|
|
|
|
key: test_mcc
|
|
value: [0.45868247 0.36803496 0.43034895 0.42004128 0.77380952 0.88640526
|
|
0.48416483 0.4025974 0.12182898 0.48416483]
|
|
|
|
mean value: 0.4830078495368003
|
|
|
|
key: train_mcc
|
|
value: [0.96182348 0.94915491 0.92371324 0.96223327 0.93656134 0.89835373
|
|
0.97466626 0.9748321 0.96196428 0.96196428]
|
|
|
|
mean value: 0.9505266878223336
|
|
|
|
key: test_accuracy
|
|
value: [0.73684211 0.68421053 0.68421053 0.73684211 0.89473684 0.94444444
|
|
0.72222222 0.66666667 0.61111111 0.72222222]
|
|
|
|
mean value: 0.7403508771929824
|
|
|
|
key: train_accuracy
|
|
value: [0.98192771 0.97590361 0.96385542 0.98192771 0.96987952 0.95209581
|
|
0.98802395 0.98802395 0.98203593 0.98203593]
|
|
|
|
mean value: 0.976570954476589
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.72727273 0.7 0.8 0.91666667 0.95652174
|
|
0.73684211 0.66666667 0.72 0.73684211]
|
|
|
|
mean value: 0.7760812010262811
|
|
|
|
key: train_fscore
|
|
value: [0.98536585 0.98058252 0.97058824 0.98550725 0.97584541 0.96153846
|
|
0.99029126 0.99038462 0.98550725 0.98550725]
|
|
|
|
mean value: 0.9811118102041951
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.8 0.875 0.76923077 0.91666667 0.91666667
|
|
0.875 0.85714286 0.64285714 0.875 ]
|
|
|
|
mean value: 0.8241849816849817
|
|
|
|
key: train_precision
|
|
value: [0.99019608 0.97115385 0.97058824 0.97142857 0.96190476 0.95238095
|
|
0.99029126 0.98095238 0.98076923 0.98076923]
|
|
|
|
mean value: 0.9750434550220387
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.66666667 0.58333333 0.83333333 0.91666667 1.
|
|
0.63636364 0.54545455 0.81818182 0.63636364]
|
|
|
|
mean value: 0.7545454545454545
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.99019608 0.97058824 1. 0.99019608 0.97087379
|
|
0.99029126 1. 0.99029126 0.99029126]
|
|
|
|
mean value: 0.9873310489244241
|
|
|
|
key: test_roc_auc
|
|
value: [0.70454545 0.69047619 0.7202381 0.70238095 0.88690476 0.92857143
|
|
0.74675325 0.7012987 0.55194805 0.74675325]
|
|
|
|
mean value: 0.737987012987013
|
|
|
|
key: train_roc_auc
|
|
value: [0.98235475 0.97166054 0.96185662 0.9765625 0.96384804 0.94637439
|
|
0.98733313 0.984375 0.97952063 0.97952063]
|
|
|
|
mean value: 0.9733406236685613
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.57142857 0.53846154 0.66666667 0.84615385 0.91666667
|
|
0.58333333 0.5 0.5625 0.58333333]
|
|
|
|
mean value: 0.6435210622710623
|
|
|
|
key: train_jcc
|
|
value: [0.97115385 0.96190476 0.94285714 0.97142857 0.95283019 0.92592593
|
|
0.98076923 0.98095238 0.97142857 0.97142857]
|
|
|
|
mean value: 0.9630679191528249
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0232811 0.00937867 0.00896811 0.00889492 0.00896215 0.00895619
|
|
0.00900936 0.01012516 0.01084137 0.00878739]
|
|
|
|
mean value: 0.010720443725585938
|
|
|
|
key: score_time
|
|
value: [0.00920415 0.00897765 0.0090692 0.0088675 0.0088439 0.00873423
|
|
0.00930619 0.00982428 0.00979805 0.00853658]
|
|
|
|
mean value: 0.009116172790527344
|
|
|
|
key: test_mcc
|
|
value: [ 0.34405118 0.18531233 -0.04941662 0.42004128 0.14085904 0.26856633
|
|
0.2987013 0.16116459 0.0805823 0.26856633]
|
|
|
|
mean value: 0.2118428048944046
|
|
|
|
key: train_mcc
|
|
value: [0.37947231 0.36682397 0.37021128 0.40845955 0.39898595 0.3183612
|
|
0.34304366 0.3576444 0.42468968 0.34769188]
|
|
|
|
mean value: 0.37153838914990045
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.63157895 0.52631579 0.73684211 0.63157895 0.66666667
|
|
0.66666667 0.61111111 0.61111111 0.66666667]
|
|
|
|
mean value: 0.6432748538011696
|
|
|
|
key: train_accuracy
|
|
value: [0.71686747 0.71084337 0.71084337 0.72891566 0.72289157 0.68862275
|
|
0.7005988 0.70658683 0.73652695 0.7005988 ]
|
|
|
|
mean value: 0.7123295577519659
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.72 0.64 0.8 0.74074074 0.75
|
|
0.72727273 0.69565217 0.74074074 0.75 ]
|
|
|
|
mean value: 0.7333637151898021
|
|
|
|
key: train_fscore
|
|
value: [0.78538813 0.78378378 0.77981651 0.80519481 0.78703704 0.76363636
|
|
0.77477477 0.77828054 0.8018018 0.7706422 ]
|
|
|
|
mean value: 0.7830355952665203
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.69230769 0.61538462 0.76923077 0.66666667 0.69230769
|
|
0.72727273 0.66666667 0.625 0.69230769]
|
|
|
|
mean value: 0.6813811188811189
|
|
|
|
key: train_precision
|
|
value: [0.74137931 0.725 0.73275862 0.72093023 0.74561404 0.71794872
|
|
0.72268908 0.72881356 0.74789916 0.73043478]
|
|
|
|
mean value: 0.7313467493853907
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.75 0.66666667 0.83333333 0.83333333 0.81818182
|
|
0.72727273 0.72727273 0.90909091 0.81818182]
|
|
|
|
mean value: 0.7992424242424243
|
|
|
|
key: train_recall
|
|
value: [0.83495146 0.85294118 0.83333333 0.91176471 0.83333333 0.81553398
|
|
0.83495146 0.83495146 0.86407767 0.81553398]
|
|
|
|
mean value: 0.8431372549019608
|
|
|
|
key: test_roc_auc
|
|
value: [0.64204545 0.58928571 0.47619048 0.70238095 0.55952381 0.62337662
|
|
0.64935065 0.57792208 0.52597403 0.62337662]
|
|
|
|
mean value: 0.5969426406926407
|
|
|
|
key: train_roc_auc
|
|
value: [0.67938049 0.66865809 0.67447917 0.67463235 0.69010417 0.64995449
|
|
0.65966323 0.66747573 0.69766383 0.66557949]
|
|
|
|
mean value: 0.6727591036414566
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.5625 0.47058824 0.66666667 0.58823529 0.6
|
|
0.57142857 0.53333333 0.58823529 0.6 ]
|
|
|
|
mean value: 0.5805987394957983
|
|
|
|
key: train_jcc
|
|
value: [0.64661654 0.64444444 0.63909774 0.67391304 0.64885496 0.61764706
|
|
0.63235294 0.63703704 0.66917293 0.62686567]
|
|
|
|
mean value: 0.6436002376478708
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01141 0.01607895 0.01427817 0.01620102 0.01566148 0.01515269
|
|
0.01575112 0.01580167 0.01595831 0.01498699]
|
|
|
|
mean value: 0.015128040313720703
|
|
|
|
key: score_time
|
|
value: [0.00860381 0.01093388 0.01091313 0.01146984 0.01156068 0.01149845
|
|
0.0115273 0.01151919 0.01151872 0.01149392]
|
|
|
|
mean value: 0.01110389232635498
|
|
|
|
key: test_mcc
|
|
value: [0.35227273 0.7824608 0.36803496 0.58655573 0.40849122 0.76623377
|
|
0.56061191 0.2548236 0.40291148 0.32232919]
|
|
|
|
mean value: 0.48047253774078613
|
|
|
|
key: train_mcc
|
|
value: [0.87956612 0.94974006 0.81149011 0.84765971 0.81698712 0.8872319
|
|
0.54476067 0.91320801 0.83195371 0.74686754]
|
|
|
|
mean value: 0.8229464950111158
|
|
|
|
key: test_accuracy
|
|
value: [0.68421053 0.89473684 0.68421053 0.78947368 0.73684211 0.88888889
|
|
0.77777778 0.66666667 0.72222222 0.61111111]
|
|
|
|
mean value: 0.7456140350877193
|
|
|
|
key: train_accuracy
|
|
value: [0.93975904 0.97590361 0.90963855 0.92168675 0.90963855 0.94610778
|
|
0.77245509 0.95808383 0.91017964 0.85628743]
|
|
|
|
mean value: 0.9099740278479186
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.92307692 0.72727273 0.81818182 0.81481481 0.90909091
|
|
0.84615385 0.76923077 0.7826087 0.58823529]
|
|
|
|
mean value: 0.7905938524864355
|
|
|
|
key: train_fscore
|
|
value: [0.94949495 0.98019802 0.93023256 0.93264249 0.93150685 0.95774648
|
|
0.8442623 0.96713615 0.92146597 0.86813187]
|
|
|
|
mean value: 0.9282817624706369
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.85714286 0.8 0.9 0.73333333 0.90909091
|
|
0.73333333 0.66666667 0.75 0.83333333]
|
|
|
|
mean value: 0.791017316017316
|
|
|
|
key: train_precision
|
|
value: [0.98947368 0.99 0.88495575 0.98901099 0.87179487 0.92727273
|
|
0.73049645 0.93636364 1. 1. ]
|
|
|
|
mean value: 0.9319368114765849
|
|
|
|
key: test_recall
|
|
value: [0.72727273 1. 0.66666667 0.75 0.91666667 0.90909091
|
|
1. 0.90909091 0.81818182 0.45454545]
|
|
|
|
mean value: 0.8151515151515152
|
|
|
|
key: train_recall
|
|
value: [0.91262136 0.97058824 0.98039216 0.88235294 1. 0.99029126
|
|
1. 1. 0.85436893 0.76699029]
|
|
|
|
mean value: 0.9357605177993528
|
|
|
|
key: test_roc_auc
|
|
value: [0.67613636 0.85714286 0.69047619 0.80357143 0.67261905 0.88311688
|
|
0.71428571 0.5974026 0.69480519 0.65584416]
|
|
|
|
mean value: 0.7245400432900433
|
|
|
|
key: train_roc_auc
|
|
value: [0.94837417 0.97748162 0.88863358 0.93336397 0.8828125 0.93264563
|
|
0.703125 0.9453125 0.92718447 0.88349515]
|
|
|
|
mean value: 0.9022428581060256
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.85714286 0.57142857 0.69230769 0.6875 0.83333333
|
|
0.73333333 0.625 0.64285714 0.41666667]
|
|
|
|
mean value: 0.6630998168498169
|
|
|
|
key: train_jcc
|
|
value: [0.90384615 0.96116505 0.86956522 0.87378641 0.87179487 0.91891892
|
|
0.73049645 0.93636364 0.85436893 0.76699029]
|
|
|
|
mean value: 0.8687295931827245
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01471901 0.01381922 0.01489019 0.01392627 0.01413941 0.01386118
|
|
0.01413894 0.01493621 0.01305819 0.01561236]
|
|
|
|
mean value: 0.014310097694396973
|
|
|
|
key: score_time
|
|
value: [0.01162362 0.01153278 0.01147699 0.01158118 0.01145577 0.01147771
|
|
0.01151061 0.01157641 0.0116725 0.01168513]
|
|
|
|
mean value: 0.011559271812438964
|
|
|
|
key: test_mcc
|
|
value: [0.29545455 0.65133895 0.53468154 0.3086067 0.65477023 0.66254135
|
|
0. 0.3040345 0.2987013 0.2548236 ]
|
|
|
|
mean value: 0.39649527059477535
|
|
|
|
key: train_mcc
|
|
value: [0.76345722 0.73678413 0.61692545 0.46724931 0.91088941 0.80279484
|
|
0.28456079 0.54476067 0.64944256 0.95111825]
|
|
|
|
mean value: 0.6727982631270347
|
|
|
|
key: test_accuracy
|
|
value: [0.63157895 0.78947368 0.78947368 0.68421053 0.84210526 0.83333333
|
|
0.61111111 0.66666667 0.66666667 0.61111111]
|
|
|
|
mean value: 0.7125730994152046
|
|
|
|
key: train_accuracy
|
|
value: [0.86746988 0.84939759 0.80722892 0.73493976 0.95783133 0.89820359
|
|
0.66467066 0.77245509 0.82035928 0.9760479 ]
|
|
|
|
mean value: 0.8348603996825626
|
|
|
|
key: test_fscore
|
|
value: [0.63157895 0.8 0.84615385 0.8 0.88 0.85714286
|
|
0.75862069 0.78571429 0.72727273 0.63157895]
|
|
|
|
mean value: 0.7718062300675731
|
|
|
|
key: train_fscore
|
|
value: [0.88043478 0.8603352 0.86440678 0.82258065 0.96618357 0.9119171
|
|
0.78625954 0.8442623 0.84210526 0.98019802]
|
|
|
|
mean value: 0.8758683196313127
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.78571429 0.66666667 0.84615385 0.9
|
|
0.61111111 0.64705882 0.72727273 0.75 ]
|
|
|
|
mean value: 0.7683977460448048
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.76119403 0.69863014 0.95238095 0.97777778
|
|
0.64779874 0.73049645 0.91954023 1. ]
|
|
|
|
mean value: 0.8687818322919909
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.66666667 0.91666667 1. 0.91666667 0.81818182
|
|
1. 1. 0.72727273 0.54545455]
|
|
|
|
mean value: 0.8136363636363636
|
|
|
|
key: train_recall
|
|
value: [0.78640777 0.75490196 1. 1. 0.98039216 0.85436893
|
|
1. 1. 0.77669903 0.96116505]
|
|
|
|
mean value: 0.9113934894346087
|
|
|
|
key: test_roc_auc
|
|
value: [0.64772727 0.83333333 0.74404762 0.57142857 0.81547619 0.83766234
|
|
0.5 0.57142857 0.64935065 0.62987013]
|
|
|
|
mean value: 0.6800324675324675
|
|
|
|
key: train_roc_auc
|
|
value: [0.89320388 0.87745098 0.75 0.65625 0.95113358 0.91155947
|
|
0.5625 0.703125 0.83366201 0.98058252]
|
|
|
|
mean value: 0.8119467447173044
|
|
|
|
key: test_jcc
|
|
value: [0.46153846 0.66666667 0.73333333 0.66666667 0.78571429 0.75
|
|
0.61111111 0.64705882 0.57142857 0.46153846]
|
|
|
|
mean value: 0.635505638152697
|
|
|
|
key: train_jcc
|
|
value: [0.78640777 0.75490196 0.76119403 0.69863014 0.93457944 0.83809524
|
|
0.64779874 0.73049645 0.72727273 0.96116505]
|
|
|
|
mean value: 0.7840541543814717
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12054205 0.10526896 0.10383749 0.10514784 0.10670519 0.10818172
|
|
0.11293268 0.11231232 0.10736513 0.10779858]
|
|
|
|
mean value: 0.10900919437408448
|
|
|
|
key: score_time
|
|
value: [0.01530385 0.0149107 0.01513171 0.01525569 0.01502442 0.01519895
|
|
0.0160737 0.01602936 0.0148201 0.01555538]
|
|
|
|
mean value: 0.0153303861618042
|
|
|
|
key: test_mcc
|
|
value: [0.56729535 0.67460105 1. 0.89559105 0.67460105 0.76623377
|
|
0.89188259 0.52299758 0.77742884 0.53246753]
|
|
|
|
mean value: 0.7303098813708148
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.84210526 1. 0.94736842 0.84210526 0.88888889
|
|
0.94444444 0.77777778 0.88888889 0.77777778]
|
|
|
|
mean value: 0.8698830409356725
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.86956522 1. 0.95652174 0.86956522 0.90909091
|
|
0.95238095 0.83333333 0.91666667 0.81818182]
|
|
|
|
mean value: 0.8958639186900056
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.90909091 1. 1. 0.90909091 0.90909091
|
|
1. 0.76923077 0.84615385 0.81818182]
|
|
|
|
mean value: 0.893006993006993
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.83333333 1. 0.91666667 0.83333333 0.90909091
|
|
0.90909091 0.90909091 1. 0.81818182]
|
|
|
|
mean value: 0.9037878787878788
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76704545 0.8452381 1. 0.95833333 0.8452381 0.88311688
|
|
0.95454545 0.74025974 0.85714286 0.76623377]
|
|
|
|
mean value: 0.861715367965368
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.76923077 1. 0.91666667 0.76923077 0.83333333
|
|
0.90909091 0.71428571 0.84615385 0.69230769]
|
|
|
|
mean value: 0.8164585414585415
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04221368 0.03988671 0.04898667 0.03659725 0.04938769 0.03940272
|
|
0.03929019 0.05242944 0.05490804 0.03600407]
|
|
|
|
mean value: 0.043910646438598634
|
|
|
|
key: score_time
|
|
value: [0.01941323 0.02854228 0.03419876 0.02406669 0.01860428 0.01948833
|
|
0.02925777 0.02827168 0.01718235 0.01625252]
|
|
|
|
mean value: 0.02352778911590576
|
|
|
|
key: test_mcc
|
|
value: [0.56729535 1. 0.80507649 0.89559105 1. 0.66254135
|
|
0.79772404 0.56061191 0.88640526 0.53246753]
|
|
|
|
mean value: 0.7707712971733849
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97457108 0.96182348 0.9873287 0.96204463 1.
|
|
0.98744925 0.97466626 0.98744925 0.94933931]
|
|
|
|
mean value: 0.9784671941115295
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 1. 0.89473684 0.94736842 1. 0.83333333
|
|
0.88888889 0.77777778 0.94444444 0.77777778]
|
|
|
|
mean value: 0.8853801169590643
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98795181 0.98192771 0.9939759 0.98192771 1.
|
|
0.99401198 0.98802395 0.99401198 0.9760479 ]
|
|
|
|
mean value: 0.9897878940913354
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 1. 0.90909091 0.95652174 1. 0.85714286
|
|
0.9 0.84615385 0.95652174 0.81818182]
|
|
|
|
mean value: 0.9076946242163634
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99019608 0.98536585 0.99512195 0.98522167 1.
|
|
0.99512195 0.99029126 0.99512195 0.98076923]
|
|
|
|
mean value: 0.9917209953530446
|
|
|
|
key: test_precision
|
|
value: [0.76923077 1. 1. 1. 1. 0.9
|
|
1. 0.73333333 0.91666667 0.81818182]
|
|
|
|
mean value: 0.9137412587412588
|
|
|
|
key: train_precision
|
|
value: [1. 0.99019608 0.98058252 0.99029126 0.99009901 1.
|
|
1. 0.99029126 1. 0.97142857]
|
|
|
|
mean value: 0.9912888708304624
|
|
|
|
key: test_recall
|
|
value: [0.90909091 1. 0.83333333 0.91666667 1. 0.81818182
|
|
0.81818182 1. 1. 0.81818182]
|
|
|
|
mean value: 0.9113636363636364
|
|
|
|
key: train_recall
|
|
value: [1. 0.99019608 0.99019608 1. 0.98039216 1.
|
|
0.99029126 0.99029126 0.99029126 0.99029126]
|
|
|
|
mean value: 0.992194936226918
|
|
|
|
key: test_roc_auc
|
|
value: [0.76704545 1. 0.91666667 0.95833333 1. 0.83766234
|
|
0.90909091 0.71428571 0.92857143 0.76623377]
|
|
|
|
mean value: 0.8797889610389611
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98728554 0.97947304 0.9921875 0.98238358 1.
|
|
0.99514563 0.98733313 0.99514563 0.97170813]
|
|
|
|
mean value: 0.989066218113459
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 1. 0.83333333 0.91666667 1. 0.75
|
|
0.81818182 0.73333333 0.91666667 0.69230769]
|
|
|
|
mean value: 0.8374775224775225
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98058252 0.97115385 0.99029126 0.97087379 1.
|
|
0.99029126 0.98076923 0.99029126 0.96226415]
|
|
|
|
mean value: 0.9836517324953852
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03705645 0.05874968 0.07011271 0.0530026 0.05404568 0.05509114
|
|
0.02280903 0.02378893 0.02255702 0.03889585]
|
|
|
|
mean value: 0.04361090660095215
|
|
|
|
key: score_time
|
|
value: [0.02256751 0.02284932 0.02406359 0.02447772 0.02279687 0.02272964
|
|
0.01286435 0.0128026 0.01270914 0.03028536]
|
|
|
|
mean value: 0.02081460952758789
|
|
|
|
key: test_mcc
|
|
value: [ 0.56729535 0.14085904 0.0952381 -0.03149704 -0.12677314 0.01413507
|
|
0.12182898 0.39594419 0.01413507 0.01413507]
|
|
|
|
mean value: 0.12053006836854342
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.63157895 0.57894737 0.57894737 0.52631579 0.55555556
|
|
0.61111111 0.72222222 0.55555556 0.55555556]
|
|
|
|
mean value: 0.6105263157894737
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.74074074 0.66666667 0.71428571 0.66666667 0.66666667
|
|
0.72 0.8 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7141693121693122
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.66666667 0.66666667 0.625 0.6 0.61538462
|
|
0.64285714 0.71428571 0.61538462 0.61538462]
|
|
|
|
mean value: 0.6530860805860806
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.83333333 0.66666667 0.83333333 0.75 0.72727273
|
|
0.81818182 0.90909091 0.72727273 0.72727273]
|
|
|
|
mean value: 0.7901515151515152
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.76704545 0.55952381 0.54761905 0.48809524 0.44642857 0.50649351
|
|
0.55194805 0.66883117 0.50649351 0.50649351]
|
|
|
|
mean value: 0.5548971861471862
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.58823529 0.5 0.55555556 0.5 0.5
|
|
0.5625 0.66666667 0.5 0.5 ]
|
|
|
|
mean value: 0.5587243230625584
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.32965541 0.33532381 0.32610035 0.33066726 0.31862354 0.31567144
|
|
0.32750487 0.32567739 0.32546377 0.32319188]
|
|
|
|
mean value: 0.3257879734039307
|
|
|
|
key: score_time
|
|
value: [0.00963497 0.00933599 0.00911212 0.00913239 0.00947118 0.00982332
|
|
0.01011229 0.0100255 0.01008368 0.01003385]
|
|
|
|
mean value: 0.009676527976989747
|
|
|
|
key: test_mcc
|
|
value: [0.56818182 0.77380952 0.89559105 1. 0.7824608 0.76623377
|
|
1. 0.39594419 0.88640526 0.64465837]
|
|
|
|
mean value: 0.7713284776335373
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.78947368 0.89473684 0.94736842 1. 0.89473684 0.88888889
|
|
1. 0.72222222 0.94444444 0.83333333]
|
|
|
|
mean value: 0.8915204678362573
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.91666667 0.95652174 1. 0.92307692 0.90909091
|
|
1. 0.8 0.95652174 0.86956522]
|
|
|
|
mean value: 0.9149625012668491
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.91666667 1. 1. 0.85714286 0.90909091
|
|
1. 0.71428571 0.91666667 0.83333333]
|
|
|
|
mean value: 0.8965367965367965
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.91666667 0.91666667 1. 1. 0.90909091
|
|
1. 0.90909091 1. 0.90909091]
|
|
|
|
mean value: 0.9378787878787879
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.78409091 0.88690476 0.95833333 1. 0.85714286 0.88311688
|
|
1. 0.66883117 0.92857143 0.81168831]
|
|
|
|
mean value: 0.8778679653679654
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.84615385 0.91666667 1. 0.85714286 0.83333333
|
|
1. 0.66666667 0.91666667 0.76923077]
|
|
|
|
mean value: 0.8498168498168498
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01851869 0.01976252 0.01977897 0.02018905 0.01982856 0.02080202
|
|
0.02046824 0.02414322 0.02400231 0.02459073]
|
|
|
|
mean value: 0.021208429336547853
|
|
|
|
key: score_time
|
|
value: [0.0122931 0.01221442 0.01403546 0.01435971 0.01452565 0.01226997
|
|
0.01526237 0.01487613 0.01891351 0.02681375]
|
|
|
|
mean value: 0.015556406974792481
|
|
|
|
key: test_mcc
|
|
value: [-0.20100756 0.18531233 -0.01163105 0.09356015 0.09356015 -0.1934765
|
|
0.2987013 0.3040345 -0.24029619 0.2987013 ]
|
|
|
|
mean value: 0.0627458419060211
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.97474109 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9974741089883715
|
|
|
|
key: test_accuracy
|
|
value: [0.52631579 0.63157895 0.47368421 0.63157895 0.63157895 0.55555556
|
|
0.66666667 0.66666667 0.44444444 0.66666667]
|
|
|
|
mean value: 0.5894736842105263
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.98795181 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9987951807228915
|
|
|
|
key: test_fscore
|
|
value: [0.68965517 0.72 0.5 0.75862069 0.75862069 0.71428571
|
|
0.72727273 0.78571429 0.58333333 0.72727273]
|
|
|
|
mean value: 0.6964775339602925
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99029126 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990291262135922
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.69230769 0.625 0.64705882 0.64705882 0.58823529
|
|
0.72727273 0.64705882 0.53846154 0.72727273]
|
|
|
|
mean value: 0.6395282005576124
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.98076923 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9980769230769231
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.75 0.41666667 0.91666667 0.91666667 0.90909091
|
|
0.72727273 1. 0.63636364 0.72727273]
|
|
|
|
mean value: 0.7909090909090909
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.45454545 0.58928571 0.49404762 0.5297619 0.5297619 0.45454545
|
|
0.64935065 0.57142857 0.38961039 0.64935065]
|
|
|
|
mean value: 0.5311688311688312
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.984375 1. 1. 1. 1. 1.
|
|
1. 1. ]
|
|
|
|
mean value: 0.9984375
|
|
|
|
key: test_jcc
|
|
value: [0.52631579 0.5625 0.33333333 0.61111111 0.61111111 0.55555556
|
|
0.57142857 0.64705882 0.41176471 0.57142857]
|
|
|
|
mean value: 0.5401607572853703
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.98076923 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9980769230769231
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03033352 0.0397439 0.03561759 0.03916883 0.03715229 0.03341866
|
|
0.03735018 0.05201197 0.05242395 0.05699086]
|
|
|
|
mean value: 0.04142117500305176
|
|
|
|
key: score_time
|
|
value: [0.02387834 0.0204978 0.02155447 0.02060032 0.02355218 0.02056217
|
|
0.02346349 0.02178597 0.02437901 0.02108812]
|
|
|
|
mean value: 0.02213618755340576
|
|
|
|
key: test_mcc
|
|
value: [0.10863102 0.42004128 0.67460105 0.77380952 0.77380952 0.76623377
|
|
0.56407607 0.40291148 0.44320263 0.53246753]
|
|
|
|
mean value: 0.5459783889001629
|
|
|
|
key: train_mcc
|
|
value: [0.92308458 0.93744159 0.89919089 0.91088941 0.89798254 0.91188694
|
|
0.92430455 0.92539974 0.94997541 0.91188694]
|
|
|
|
mean value: 0.919204259351807
|
|
|
|
key: test_accuracy
|
|
value: [0.57894737 0.73684211 0.84210526 0.89473684 0.89473684 0.88888889
|
|
0.72222222 0.72222222 0.72222222 0.77777778]
|
|
|
|
mean value: 0.7780701754385965
|
|
|
|
key: train_accuracy
|
|
value: [0.96385542 0.96987952 0.95180723 0.95783133 0.95180723 0.95808383
|
|
0.96407186 0.96407186 0.9760479 0.95808383]
|
|
|
|
mean value: 0.9615540004328692
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.86956522 0.91666667 0.91666667 0.90909091
|
|
0.70588235 0.7826087 0.81481481 0.81818182]
|
|
|
|
mean value: 0.8200143808072197
|
|
|
|
key: train_fscore
|
|
value: [0.97115385 0.97607656 0.96190476 0.96618357 0.96116505 0.96682464
|
|
0.97142857 0.97169811 0.98095238 0.96682464]
|
|
|
|
mean value: 0.9694212141193473
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.76923077 0.90909091 0.91666667 0.91666667 0.90909091
|
|
1. 0.75 0.6875 0.81818182]
|
|
|
|
mean value: 0.8291812354312355
|
|
|
|
key: train_precision
|
|
value: [0.96190476 0.95327103 0.93518519 0.95238095 0.95192308 0.94444444
|
|
0.95327103 0.94495413 0.96261682 0.94444444]
|
|
|
|
mean value: 0.9504395872227905
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.83333333 0.83333333 0.91666667 0.91666667 0.90909091
|
|
0.54545455 0.81818182 1. 0.81818182]
|
|
|
|
mean value: 0.8318181818181818
|
|
|
|
key: train_recall
|
|
value: [0.98058252 1. 0.99019608 0.98039216 0.97058824 0.99029126
|
|
0.99029126 1. 1. 0.99029126]
|
|
|
|
mean value: 0.9892632781267847
|
|
|
|
key: test_roc_auc
|
|
value: [0.55113636 0.70238095 0.8452381 0.88690476 0.88690476 0.88311688
|
|
0.77272727 0.69480519 0.64285714 0.76623377]
|
|
|
|
mean value: 0.7632305194805196
|
|
|
|
key: train_roc_auc
|
|
value: [0.95854523 0.9609375 0.94041054 0.95113358 0.94623162 0.94827063
|
|
0.95608313 0.953125 0.96875 0.94827063]
|
|
|
|
mean value: 0.9531757858887892
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.76923077 0.84615385 0.84615385 0.83333333
|
|
0.54545455 0.64285714 0.6875 0.69230769]
|
|
|
|
mean value: 0.7029657842157843
|
|
|
|
key: train_jcc
|
|
value: [0.94392523 0.95327103 0.9266055 0.93457944 0.92523364 0.93577982
|
|
0.94444444 0.94495413 0.96261682 0.93577982]
|
|
|
|
mean value: 0.940718987872379
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.37415504 0.30020595 0.36566615 0.33783174 0.32791495 0.40239978
|
|
0.32688546 0.32808876 0.33113956 0.33140039]
|
|
|
|
mean value: 0.34256877899169924
|
|
|
|
key: score_time
|
|
value: [0.02503872 0.02488184 0.02039814 0.02254295 0.01626158 0.02357626
|
|
0.023417 0.02344203 0.02036166 0.02339745]
|
|
|
|
mean value: 0.022331762313842773
|
|
|
|
key: test_mcc
|
|
value: [0.10863102 0.54761905 0.67460105 0.77380952 0.77380952 0.76623377
|
|
0.56407607 0.40291148 0.39594419 0.71350607]
|
|
|
|
mean value: 0.5721141750028133
|
|
|
|
key: train_mcc
|
|
value: [0.92308458 0.92403878 0.89919089 0.91088941 0.89798254 0.91188694
|
|
0.92430455 0.92539974 0.94933931 0.94933931]
|
|
|
|
mean value: 0.9215456054544839
|
|
|
|
key: test_accuracy
|
|
value: [0.57894737 0.78947368 0.84210526 0.89473684 0.89473684 0.88888889
|
|
0.72222222 0.72222222 0.72222222 0.83333333]
|
|
|
|
mean value: 0.7888888888888889
|
|
|
|
key: train_accuracy
|
|
value: [0.96385542 0.96385542 0.95180723 0.95783133 0.95180723 0.95808383
|
|
0.96407186 0.96407186 0.9760479 0.9760479 ]
|
|
|
|
mean value: 0.9627479979799437
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.83333333 0.86956522 0.91666667 0.91666667 0.90909091
|
|
0.70588235 0.7826087 0.8 0.84210526]
|
|
|
|
mean value: 0.8242585771566792
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:114: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:117: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.97115385 0.97115385 0.96190476 0.96618357 0.96116505 0.96682464
|
|
0.97142857 0.97169811 0.98076923 0.98076923]
|
|
|
|
mean value: 0.9703050868359714
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.83333333 0.90909091 0.91666667 0.91666667 0.90909091
|
|
1. 0.75 0.71428571 1. ]
|
|
|
|
mean value: 0.8564518814518814
|
|
|
|
key: train_precision
|
|
value: [0.96190476 0.95283019 0.93518519 0.95238095 0.95192308 0.94444444
|
|
0.95327103 0.94495413 0.97142857 0.97142857]
|
|
|
|
mean value: 0.9539750908852559
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.83333333 0.83333333 0.91666667 0.91666667 0.90909091
|
|
0.54545455 0.81818182 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8136363636363636
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.99019608 0.99019608 0.98039216 0.97058824 0.99029126
|
|
0.99029126 1. 0.99029126 0.99029126]
|
|
|
|
mean value: 0.9873120121835142
|
|
|
|
key: test_roc_auc
|
|
value: [0.55113636 0.77380952 0.8452381 0.88690476 0.88690476 0.88311688
|
|
0.77272727 0.69480519 0.66883117 0.86363636]
|
|
|
|
mean value: 0.782711038961039
|
|
|
|
key: train_roc_auc
|
|
value: [0.95854523 0.95603554 0.94041054 0.95113358 0.94623162 0.94827063
|
|
0.95608313 0.953125 0.97170813 0.97170813]
|
|
|
|
mean value: 0.9553251529171539
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.71428571 0.76923077 0.84615385 0.84615385 0.83333333
|
|
0.54545455 0.64285714 0.66666667 0.72727273]
|
|
|
|
mean value: 0.7091408591408591
|
|
|
|
key: train_jcc
|
|
value: [0.94392523 0.94392523 0.9266055 0.93457944 0.92523364 0.93577982
|
|
0.94444444 0.94495413 0.96226415 0.96226415]
|
|
|
|
mean value: 0.942397574727439
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03312874 0.06524324 0.10863423 0.13530827 0.03684163 0.03828144
|
|
0.03505182 0.06022382 0.09920526 0.03362274]
|
|
|
|
mean value: 0.06455411911010742
|
|
|
|
key: score_time
|
|
value: [0.01303244 0.01533794 0.0123136 0.01206446 0.01194906 0.01522326
|
|
0.01591754 0.01242018 0.01871157 0.01189852]
|
|
|
|
mean value: 0.013886857032775878
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.6992059 0.56818182 0.56490196 0.65151515 0.83971912
|
|
0.74047959 0.66414149 0.45454545 0.37796447]
|
|
|
|
mean value: 0.6301134542242922
|
|
|
|
key: train_mcc
|
|
value: [0.83418999 0.88292404 0.903143 0.85368872 0.85370265 0.88292404
|
|
0.85368872 0.83418999 0.81557242 0.88366175]
|
|
|
|
mean value: 0.8597685325684289
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.82608696 0.7826087 0.7826087 0.82608696 0.91304348
|
|
0.86956522 0.82608696 0.72727273 0.68181818]
|
|
|
|
mean value: 0.8104743083003952
|
|
|
|
key: train_accuracy
|
|
value: [0.91707317 0.94146341 0.95121951 0.92682927 0.92682927 0.94146341
|
|
0.92682927 0.91707317 0.90776699 0.94174757]
|
|
|
|
mean value: 0.9298295050911675
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.84615385 0.7826087 0.76190476 0.83333333 0.90909091
|
|
0.88 0.81818182 0.72727273 0.63157895]
|
|
|
|
mean value: 0.8047267896100848
|
|
|
|
key: train_fscore
|
|
value: [0.91707317 0.94174757 0.95049505 0.92753623 0.92682927 0.94117647
|
|
0.92610837 0.91707317 0.90731707 0.94230769]
|
|
|
|
mean value: 0.9297664074411536
|
|
|
|
key: test_precision
|
|
value: [0.9 0.73333333 0.75 0.8 0.83333333 1.
|
|
0.84615385 0.9 0.72727273 0.75 ]
|
|
|
|
mean value: 0.824009324009324
|
|
|
|
key: train_precision
|
|
value: [0.92156863 0.94174757 0.96969697 0.92307692 0.9223301 0.94117647
|
|
0.93069307 0.91262136 0.91176471 0.93333333]
|
|
|
|
mean value: 0.9308009128461939
|
|
|
|
key: test_recall
|
|
value: [0.81818182 1. 0.81818182 0.72727273 0.83333333 0.83333333
|
|
0.91666667 0.75 0.72727273 0.54545455]
|
|
|
|
mean value: 0.796969696969697
|
|
|
|
key: train_recall
|
|
value: [0.91262136 0.94174757 0.93203883 0.93203883 0.93137255 0.94117647
|
|
0.92156863 0.92156863 0.90291262 0.95145631]
|
|
|
|
mean value: 0.9288501808490387
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.83333333 0.78409091 0.78030303 0.82575758 0.91666667
|
|
0.86742424 0.82954545 0.72727273 0.68181818]
|
|
|
|
mean value: 0.8113636363636364
|
|
|
|
key: train_roc_auc
|
|
value: [0.91709499 0.94146202 0.95131354 0.92680373 0.92685132 0.94146202
|
|
0.92680373 0.91709499 0.90776699 0.94174757]
|
|
|
|
mean value: 0.9298400913763564
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.73333333 0.64285714 0.61538462 0.71428571 0.83333333
|
|
0.78571429 0.69230769 0.57142857 0.46153846]
|
|
|
|
mean value: 0.680018315018315
|
|
|
|
key: train_jcc
|
|
value: [0.84684685 0.88990826 0.90566038 0.86486486 0.86363636 0.88888889
|
|
0.86238532 0.84684685 0.83035714 0.89090909]
|
|
|
|
mean value: 0.8690304000190187
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.82665825 0.93447065 0.77925324 0.78353238 0.88928652 0.84824395
|
|
0.78165197 0.86581111 0.82213378 0.89065075]
|
|
|
|
mean value: 0.8421692609786987
|
|
|
|
key: score_time
|
|
value: [0.01458549 0.01205873 0.01185203 0.01503658 0.0150156 0.01501513
|
|
0.0152204 0.01511884 0.01503301 0.01509953]
|
|
|
|
mean value: 0.014403533935546876
|
|
|
|
key: test_mcc
|
|
value: [0.65909298 0.63327851 0.56818182 0.65151515 0.74242424 0.91666667
|
|
0.74047959 0.65151515 0.63636364 0.46225016]
|
|
|
|
mean value: 0.6661767909021908
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99029126 0.95126594 1. 0.95126594 0.92194936
|
|
1. 1. 1. 0.9223301 ]
|
|
|
|
mean value: 0.9737102608033504
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.7826087 0.7826087 0.82608696 0.86956522 0.95652174
|
|
0.86956522 0.82608696 0.81818182 0.72727273]
|
|
|
|
mean value: 0.8284584980237154
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99512195 0.97560976 1. 0.97560976 0.96097561
|
|
1. 1. 1. 0.96116505]
|
|
|
|
mean value: 0.9868482121714421
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.81481481 0.7826087 0.81818182 0.86956522 0.95652174
|
|
0.88 0.83333333 0.81818182 0.7 ]
|
|
|
|
mean value: 0.8273207436685698
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99512195 0.97560976 1. 0.97560976 0.96078431
|
|
1. 1. 1. 0.96116505]
|
|
|
|
mean value: 0.9868290825683814
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.6875 0.75 0.81818182 0.90909091 1.
|
|
0.84615385 0.83333333 0.81818182 0.77777778]
|
|
|
|
mean value: 0.8329108391608392
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.98039216 1. 0.97087379 0.96078431
|
|
1. 1. 1. 0.96116505]
|
|
|
|
mean value: 0.9873215305539692
|
|
|
|
key: test_recall
|
|
value: [0.72727273 1. 0.81818182 0.81818182 0.83333333 0.91666667
|
|
0.91666667 0.83333333 0.81818182 0.63636364]
|
|
|
|
mean value: 0.8318181818181818
|
|
|
|
key: train_recall
|
|
value: [1. 0.99029126 0.97087379 1. 0.98039216 0.96078431
|
|
1. 1. 1. 0.96116505]
|
|
|
|
mean value: 0.9863506567675614
|
|
|
|
key: test_roc_auc
|
|
value: [0.8219697 0.79166667 0.78409091 0.82575758 0.87121212 0.95833333
|
|
0.86742424 0.82575758 0.81818182 0.72727273]
|
|
|
|
mean value: 0.8291666666666666
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99514563 0.97563297 1. 0.97563297 0.96097468
|
|
1. 1. 1. 0.96116505]
|
|
|
|
mean value: 0.9868551304016753
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.6875 0.64285714 0.69230769 0.76923077 0.91666667
|
|
0.78571429 0.71428571 0.69230769 0.53846154]
|
|
|
|
mean value: 0.7105998168498169
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99029126 0.95238095 1. 0.95238095 0.9245283
|
|
1. 1. 1. 0.92523364]
|
|
|
|
mean value: 0.9744815113644433
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01280308 0.01112795 0.00904179 0.00915575 0.00870848 0.0087254
|
|
0.008672 0.00886917 0.00864339 0.0087316 ]
|
|
|
|
mean value: 0.009447860717773437
|
|
|
|
key: score_time
|
|
value: [0.01178265 0.00904846 0.0089066 0.00885105 0.00853443 0.00852323
|
|
0.00856495 0.00855088 0.00860286 0.00863481]
|
|
|
|
mean value: 0.008999991416931152
|
|
|
|
key: test_mcc
|
|
value: [0.41096386 0.44411739 0.38932432 0.15096491 0.38932432 0.3030303
|
|
0.47727273 0.30240737 0.09245003 0.54772256]
|
|
|
|
mean value: 0.35075777936506775
|
|
|
|
key: train_mcc
|
|
value: [0.4448612 0.44400007 0.46806514 0.53843728 0.47567594 0.45607916
|
|
0.45709726 0.49637007 0.42964161 0.50892419]
|
|
|
|
mean value: 0.4719151927110299
|
|
|
|
key: test_accuracy
|
|
value: [0.69565217 0.69565217 0.69565217 0.56521739 0.69565217 0.65217391
|
|
0.73913043 0.65217391 0.54545455 0.77272727]
|
|
|
|
mean value: 0.6709486166007905
|
|
|
|
key: train_accuracy
|
|
value: [0.70243902 0.72195122 0.73170732 0.75609756 0.73658537 0.72682927
|
|
0.72682927 0.74634146 0.71359223 0.75242718]
|
|
|
|
mean value: 0.7314799905280606
|
|
|
|
key: test_fscore
|
|
value: [0.72 0.74074074 0.66666667 0.61538462 0.72 0.66666667
|
|
0.75 0.69230769 0.58333333 0.76190476]
|
|
|
|
mean value: 0.6917004477004477
|
|
|
|
key: train_fscore
|
|
value: [0.75502008 0.72727273 0.75113122 0.78991597 0.74766355 0.73831776
|
|
0.74074074 0.75925926 0.6974359 0.76712329]
|
|
|
|
mean value: 0.747388048921837
|
|
|
|
key: test_precision
|
|
value: [0.64285714 0.625 0.7 0.53333333 0.69230769 0.66666667
|
|
0.75 0.64285714 0.53846154 0.8 ]
|
|
|
|
mean value: 0.6591483516483516
|
|
|
|
key: train_precision
|
|
value: [0.64383562 0.71698113 0.70338983 0.6962963 0.71428571 0.70535714
|
|
0.70175439 0.71929825 0.73913043 0.72413793]
|
|
|
|
mean value: 0.7064466729857495
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.90909091 0.63636364 0.72727273 0.75 0.66666667
|
|
0.75 0.75 0.63636364 0.72727273]
|
|
|
|
mean value: 0.7371212121212121
|
|
|
|
key: train_recall
|
|
value: [0.91262136 0.73786408 0.80582524 0.91262136 0.78431373 0.7745098
|
|
0.78431373 0.80392157 0.66019417 0.81553398]
|
|
|
|
mean value: 0.7991719017704169
|
|
|
|
key: test_roc_auc
|
|
value: [0.70075758 0.70454545 0.69318182 0.5719697 0.69318182 0.65151515
|
|
0.73863636 0.64772727 0.54545455 0.77272727]
|
|
|
|
mean value: 0.6719696969696969
|
|
|
|
key: train_roc_auc
|
|
value: [0.70140872 0.72187322 0.73134399 0.75533029 0.73681706 0.72706073
|
|
0.72710832 0.74662098 0.71359223 0.75242718]
|
|
|
|
mean value: 0.7313582714639254
|
|
|
|
key: test_jcc
|
|
value: [0.5625 0.58823529 0.5 0.44444444 0.5625 0.5
|
|
0.6 0.52941176 0.41176471 0.61538462]
|
|
|
|
mean value: 0.5314240824534943
|
|
|
|
key: train_jcc
|
|
value: [0.60645161 0.57142857 0.60144928 0.65277778 0.59701493 0.58518519
|
|
0.58823529 0.6119403 0.53543307 0.62222222]
|
|
|
|
mean value: 0.5972138233743687
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00916004 0.00893545 0.0089612 0.00898767 0.00895739 0.00898981
|
|
0.00910544 0.00896788 0.00898862 0.00901413]
|
|
|
|
mean value: 0.009006762504577636
|
|
|
|
key: score_time
|
|
value: [0.00864172 0.00866985 0.00862956 0.00853229 0.00861835 0.00858736
|
|
0.00872016 0.00858521 0.00866818 0.00860548]
|
|
|
|
mean value: 0.00862581729888916
|
|
|
|
key: test_mcc
|
|
value: [0.65909298 0.21452908 0.12336594 0.21452908 0.08257228 0.44411739
|
|
0.08257228 0.23262105 0.32539569 0.23570226]
|
|
|
|
mean value: 0.26144980489209724
|
|
|
|
key: train_mcc
|
|
value: [0.431714 0.44379575 0.47690661 0.38794503 0.41929975 0.43858746
|
|
0.45614118 0.4454215 0.40723148 0.39531893]
|
|
|
|
mean value: 0.430236169300088
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.60869565 0.56521739 0.60869565 0.52173913 0.69565217
|
|
0.52173913 0.60869565 0.63636364 0.59090909]
|
|
|
|
mean value: 0.6183794466403162
|
|
|
|
key: train_accuracy
|
|
value: [0.70243902 0.70731707 0.72195122 0.67804878 0.69268293 0.70731707
|
|
0.71707317 0.70731707 0.69417476 0.68446602]
|
|
|
|
mean value: 0.7012787118162443
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.52631579 0.44444444 0.52631579 0.35294118 0.63157895
|
|
0.35294118 0.57142857 0.5 0.4 ]
|
|
|
|
mean value: 0.5105965895129981
|
|
|
|
key: train_fscore
|
|
value: [0.64327485 0.64705882 0.66272189 0.60240964 0.61349693 0.64705882
|
|
0.6627907 0.63855422 0.64 0.61538462]
|
|
|
|
mean value: 0.6372750495347176
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.625 0.57142857 0.625 0.6 0.85714286
|
|
0.6 0.66666667 0.8 0.75 ]
|
|
|
|
mean value: 0.6984126984126984
|
|
|
|
key: train_precision
|
|
value: [0.80882353 0.82089552 0.84848485 0.79365079 0.81967213 0.80882353
|
|
0.81428571 0.828125 0.77777778 0.78787879]
|
|
|
|
mean value: 0.8108417634437052
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.45454545 0.36363636 0.45454545 0.25 0.5
|
|
0.25 0.5 0.36363636 0.27272727]
|
|
|
|
mean value: 0.41363636363636364
|
|
|
|
key: train_recall
|
|
value: [0.53398058 0.53398058 0.54368932 0.48543689 0.49019608 0.53921569
|
|
0.55882353 0.51960784 0.54368932 0.50485437]
|
|
|
|
mean value: 0.5253474205216067
|
|
|
|
key: test_roc_auc
|
|
value: [0.8219697 0.60227273 0.55681818 0.60227273 0.53409091 0.70454545
|
|
0.53409091 0.61363636 0.63636364 0.59090909]
|
|
|
|
mean value: 0.6196969696969696
|
|
|
|
key: train_roc_auc
|
|
value: [0.7032648 0.70816676 0.72282505 0.67899296 0.69169998 0.70650105
|
|
0.71630497 0.70640586 0.69417476 0.68446602]
|
|
|
|
mean value: 0.7012802208261946
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.35714286 0.28571429 0.35714286 0.21428571 0.46153846
|
|
0.21428571 0.4 0.33333333 0.25 ]
|
|
|
|
mean value: 0.354010989010989
|
|
|
|
key: train_jcc
|
|
value: [0.47413793 0.47826087 0.49557522 0.43103448 0.44247788 0.47826087
|
|
0.49565217 0.46902655 0.47058824 0.44444444]
|
|
|
|
mean value: 0.4679458652592843
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00874019 0.00965261 0.0093236 0.00860143 0.00863314 0.00863075
|
|
0.00869918 0.00897694 0.00959516 0.009655 ]
|
|
|
|
mean value: 0.009050798416137696
|
|
|
|
key: score_time
|
|
value: [0.01461792 0.01053357 0.01011968 0.00984621 0.00996614 0.00994396
|
|
0.01022553 0.01024222 0.01070547 0.01071763]
|
|
|
|
mean value: 0.01069183349609375
|
|
|
|
key: test_mcc
|
|
value: [0.12878788 0.31298622 0.02585438 0.12406456 0.44411739 0.2096648
|
|
0.25495628 0.3030303 0.23570226 0. ]
|
|
|
|
mean value: 0.20391640867334052
|
|
|
|
key: train_mcc
|
|
value: [0.53446628 0.51172946 0.55610418 0.52267493 0.48193786 0.50002007
|
|
0.46832513 0.45886299 0.51700551 0.53764186]
|
|
|
|
mean value: 0.5088768274038467
|
|
|
|
key: test_accuracy
|
|
value: [0.56521739 0.65217391 0.52173913 0.56521739 0.69565217 0.56521739
|
|
0.60869565 0.65217391 0.59090909 0.5 ]
|
|
|
|
mean value: 0.591699604743083
|
|
|
|
key: train_accuracy
|
|
value: [0.76585366 0.75121951 0.77073171 0.75609756 0.73658537 0.74634146
|
|
0.72682927 0.72682927 0.75728155 0.76699029]
|
|
|
|
mean value: 0.7504759649538243
|
|
|
|
key: test_fscore
|
|
value: [0.54545455 0.66666667 0.35294118 0.5 0.63157895 0.375
|
|
0.52631579 0.66666667 0.4 0.42105263]
|
|
|
|
mean value: 0.5085676423679519
|
|
|
|
key: train_fscore
|
|
value: [0.75510204 0.72727273 0.7431694 0.7311828 0.70652174 0.72043011
|
|
0.68539326 0.70212766 0.74489796 0.75257732]
|
|
|
|
mean value: 0.7268675006125136
|
|
|
|
key: test_precision
|
|
value: [0.54545455 0.61538462 0.5 0.55555556 0.85714286 0.75
|
|
0.71428571 0.66666667 0.75 0.5 ]
|
|
|
|
mean value: 0.6454489954489955
|
|
|
|
key: train_precision
|
|
value: [0.79569892 0.80952381 0.85 0.81927711 0.79268293 0.79761905
|
|
0.80263158 0.76744186 0.78494624 0.8021978 ]
|
|
|
|
mean value: 0.802201929530647
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.72727273 0.27272727 0.45454545 0.5 0.25
|
|
0.41666667 0.66666667 0.27272727 0.36363636]
|
|
|
|
mean value: 0.44696969696969696
|
|
|
|
key: train_recall
|
|
value: [0.7184466 0.66019417 0.66019417 0.66019417 0.6372549 0.65686275
|
|
0.59803922 0.64705882 0.70873786 0.70873786]
|
|
|
|
mean value: 0.6655720540643442
|
|
|
|
key: test_roc_auc
|
|
value: [0.56439394 0.65530303 0.51136364 0.56060606 0.70454545 0.57954545
|
|
0.61742424 0.65151515 0.59090909 0.5 ]
|
|
|
|
mean value: 0.593560606060606
|
|
|
|
key: train_roc_auc
|
|
value: [0.76608605 0.75166571 0.77127356 0.75656768 0.73610318 0.7459071
|
|
0.72620407 0.72644203 0.75728155 0.76699029]
|
|
|
|
mean value: 0.7504521225966115
|
|
|
|
key: test_jcc
|
|
value: [0.375 0.5 0.21428571 0.33333333 0.46153846 0.23076923
|
|
0.35714286 0.5 0.25 0.26666667]
|
|
|
|
mean value: 0.3488736263736264
|
|
|
|
key: train_jcc
|
|
value: [0.60655738 0.57142857 0.59130435 0.57627119 0.54621849 0.56302521
|
|
0.52136752 0.54098361 0.59349593 0.60330579]
|
|
|
|
mean value: 0.5713958028231724
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0141499 0.01209903 0.01171756 0.01287889 0.01204228 0.01184416
|
|
0.01186538 0.01188636 0.01186728 0.01376891]
|
|
|
|
mean value: 0.012411975860595703
|
|
|
|
key: score_time
|
|
value: [0.01020265 0.00940251 0.00930929 0.01236463 0.00963902 0.00941229
|
|
0.00974226 0.00945449 0.00942039 0.01031494]
|
|
|
|
mean value: 0.009926247596740722
|
|
|
|
key: test_mcc
|
|
value: [0.58002308 0.6992059 0.38932432 0.21374669 0.5164589 0.58930667
|
|
0.58930667 0.39393939 0.2773501 0.2773501 ]
|
|
|
|
mean value: 0.4526011806094105
|
|
|
|
key: train_mcc
|
|
value: [0.68838106 0.71237056 0.72307355 0.72506339 0.70305132 0.70305132
|
|
0.66368352 0.72693519 0.72423827 0.74069712]
|
|
|
|
mean value: 0.7110545294044077
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.82608696 0.69565217 0.60869565 0.73913043 0.7826087
|
|
0.7826087 0.69565217 0.63636364 0.63636364]
|
|
|
|
mean value: 0.7185770750988142
|
|
|
|
key: train_accuracy
|
|
value: [0.84390244 0.85365854 0.85853659 0.85853659 0.84878049 0.84878049
|
|
0.82926829 0.86341463 0.8592233 0.86893204]
|
|
|
|
mean value: 0.8533033388586313
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.84615385 0.66666667 0.57142857 0.7 0.76190476
|
|
0.76190476 0.69565217 0.6 0.6 ]
|
|
|
|
mean value: 0.6940552887234809
|
|
|
|
key: train_fscore
|
|
value: [0.84158416 0.84536082 0.84974093 0.84816754 0.83769634 0.83769634
|
|
0.81675393 0.86138614 0.84974093 0.86294416]
|
|
|
|
mean value: 0.8451071285619147
|
|
|
|
key: test_precision
|
|
value: [0.875 0.73333333 0.7 0.6 0.875 0.88888889
|
|
0.88888889 0.72727273 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7621717171717172
|
|
|
|
key: train_precision
|
|
value: [0.85858586 0.9010989 0.91111111 0.92045455 0.8988764 0.8988764
|
|
0.87640449 0.87 0.91111111 0.90425532]
|
|
|
|
mean value: 0.895077414988125
|
|
|
|
key: test_recall
|
|
value: [0.63636364 1. 0.63636364 0.54545455 0.58333333 0.66666667
|
|
0.66666667 0.66666667 0.54545455 0.54545455]
|
|
|
|
mean value: 0.6492424242424242
|
|
|
|
key: train_recall
|
|
value: [0.82524272 0.7961165 0.7961165 0.78640777 0.78431373 0.78431373
|
|
0.76470588 0.85294118 0.7961165 0.82524272]
|
|
|
|
mean value: 0.8011517228250523
|
|
|
|
key: test_roc_auc
|
|
value: [0.77651515 0.83333333 0.69318182 0.60606061 0.74621212 0.78787879
|
|
0.78787879 0.6969697 0.63636364 0.63636364]
|
|
|
|
mean value: 0.7200757575757576
|
|
|
|
key: train_roc_auc
|
|
value: [0.84399391 0.85394061 0.85884257 0.85889016 0.84846754 0.84846754
|
|
0.82895488 0.86336379 0.8592233 0.86893204]
|
|
|
|
mean value: 0.8533076337331049
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.73333333 0.5 0.4 0.53846154 0.61538462
|
|
0.61538462 0.53333333 0.42857143 0.42857143]
|
|
|
|
mean value: 0.5376373626373626
|
|
|
|
key: train_jcc
|
|
value: [0.72649573 0.73214286 0.73873874 0.73636364 0.72072072 0.72072072
|
|
0.69026549 0.75652174 0.73873874 0.75892857]
|
|
|
|
mean value: 0.7319636936205809
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.5654161 0.70051932 0.43851113 0.8557241 0.81149411 0.945997
|
|
0.29760575 0.19453859 0.41354036 0.27692008]
|
|
|
|
mean value: 0.5500266551971436
|
|
|
|
key: score_time
|
|
value: [0.01253867 0.01216602 0.01216125 0.01217246 0.01259136 0.01223016
|
|
0.01214933 0.01222849 0.01218319 0.02718925]
|
|
|
|
mean value: 0.013761019706726075
|
|
|
|
key: test_mcc
|
|
value: [0.65151515 0.63327851 0.38932432 0.39727608 0.56818182 0.74242424
|
|
0.63327851 0.12406456 0.47140452 0.23570226]
|
|
|
|
mean value: 0.48464499668963007
|
|
|
|
key: train_mcc
|
|
value: [0.64278523 0.8360404 0.75277897 0.79983884 0.89371934 0.87321531
|
|
0.54046344 0.58230118 0.58157543 0.48196269]
|
|
|
|
mean value: 0.6984680827686014
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.7826087 0.69565217 0.69565217 0.7826087 0.86956522
|
|
0.7826087 0.56521739 0.68181818 0.59090909]
|
|
|
|
mean value: 0.7272727272727273
|
|
|
|
key: train_accuracy
|
|
value: [0.8 0.91707317 0.86829268 0.89756098 0.94634146 0.93658537
|
|
0.76097561 0.78536585 0.75728155 0.69417476]
|
|
|
|
mean value: 0.8363651432630831
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.81481481 0.66666667 0.63157895 0.7826087 0.86956522
|
|
0.73684211 0.61538462 0.75862069 0.68965517]
|
|
|
|
mean value: 0.7383918742791937
|
|
|
|
key: train_fscore
|
|
value: [0.83127572 0.92018779 0.85405405 0.89230769 0.94472362 0.93658537
|
|
0.72316384 0.80357143 0.80314961 0.76404494]
|
|
|
|
mean value: 0.8473064064396472
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.6875 0.7 0.75 0.81818182 0.90909091
|
|
1. 0.57142857 0.61111111 0.55555556]
|
|
|
|
mean value: 0.7421049783549784
|
|
|
|
key: train_precision
|
|
value: [0.72142857 0.89090909 0.96341463 0.94565217 0.96907216 0.93203883
|
|
0.85333333 0.73770492 0.67549669 0.62195122]
|
|
|
|
mean value: 0.8311001629916994
|
|
|
|
key: test_recall
|
|
value: [0.81818182 1. 0.63636364 0.54545455 0.75 0.83333333
|
|
0.58333333 0.66666667 1. 0.90909091]
|
|
|
|
mean value: 0.7742424242424243
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.95145631 0.76699029 0.84466019 0.92156863 0.94117647
|
|
0.62745098 0.88235294 0.99029126 0.99029126]
|
|
|
|
mean value: 0.8896820864268037
|
|
|
|
key: test_roc_auc
|
|
value: [0.82575758 0.79166667 0.69318182 0.68939394 0.78409091 0.87121212
|
|
0.79166667 0.56060606 0.68181818 0.59090909]
|
|
|
|
mean value: 0.728030303030303
|
|
|
|
key: train_roc_auc
|
|
value: [0.79911479 0.91690463 0.86878926 0.89782029 0.94622121 0.93660765
|
|
0.76032743 0.78583666 0.75728155 0.69417476]
|
|
|
|
mean value: 0.836307824100514
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.6875 0.5 0.46153846 0.64285714 0.76923077
|
|
0.58333333 0.44444444 0.61111111 0.52631579]
|
|
|
|
mean value: 0.5918638744296639
|
|
|
|
key: train_jcc
|
|
value: [0.71126761 0.85217391 0.74528302 0.80555556 0.8952381 0.88073394
|
|
0.56637168 0.67164179 0.67105263 0.61818182]
|
|
|
|
mean value: 0.7417500055514455
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01879621 0.01864338 0.01436448 0.01555657 0.01544523 0.01569748
|
|
0.01556516 0.0156126 0.01505065 0.01566124]
|
|
|
|
mean value: 0.016039299964904784
|
|
|
|
key: score_time
|
|
value: [0.01181293 0.00982785 0.00936031 0.00934577 0.00941539 0.00954008
|
|
0.00944448 0.00944018 0.00936866 0.00942349]
|
|
|
|
mean value: 0.009697914123535156
|
|
|
|
key: test_mcc
|
|
value: [0.76277007 0.41096386 0.48856385 1. 0.76764947 0.83971912
|
|
0.83743579 0.91605722 0.91287093 0.91287093]
|
|
|
|
mean value: 0.7848901253107335
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.69565217 0.73913043 1. 0.86956522 0.91304348
|
|
0.91304348 0.95652174 0.95454545 0.95454545]
|
|
|
|
mean value: 0.8865612648221344
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.72 0.75 1. 0.85714286 0.90909091
|
|
0.92307692 0.96 0.95652174 0.95238095]
|
|
|
|
mean value: 0.8870318643979971
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.64285714 0.69230769 1. 1. 1.
|
|
0.85714286 0.92307692 0.91666667 1. ]
|
|
|
|
mean value: 0.9032051282051282
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.81818182 0.81818182 1. 0.75 0.83333333
|
|
1. 1. 1. 0.90909091]
|
|
|
|
mean value: 0.8856060606060606
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86363636 0.70075758 0.74242424 1. 0.875 0.91666667
|
|
0.90909091 0.95454545 0.95454545 0.95454545]
|
|
|
|
mean value: 0.8871212121212121
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.5625 0.6 1. 0.75 0.83333333
|
|
0.85714286 0.92307692 0.91666667 0.90909091]
|
|
|
|
mean value: 0.8079083416583417
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10700893 0.10751104 0.10628223 0.1066637 0.10690093 0.10731983
|
|
0.1071713 0.10743928 0.1068995 0.10708857]
|
|
|
|
mean value: 0.10702853202819824
|
|
|
|
key: score_time
|
|
value: [0.01878786 0.01899338 0.01907802 0.0190022 0.01901197 0.0189383
|
|
0.01898313 0.01904893 0.0191102 0.019032 ]
|
|
|
|
mean value: 0.018998599052429198
|
|
|
|
key: test_mcc
|
|
value: [0.66414149 0.6992059 0.48856385 0.39727608 0.41096386 0.65151515
|
|
0.91605722 0.58002308 0.46225016 0.54772256]
|
|
|
|
mean value: 0.5817719350962288
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.82608696 0.73913043 0.69565217 0.69565217 0.82608696
|
|
0.95652174 0.7826087 0.72727273 0.77272727]
|
|
|
|
mean value: 0.7847826086956522
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.83333333 0.84615385 0.75 0.63157895 0.66666667 0.83333333
|
|
0.96 0.81481481 0.7 0.76190476]
|
|
|
|
mean value: 0.7797785703575177
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.76923077 0.73333333 0.69230769 0.75 0.77777778 0.83333333
|
|
0.92307692 0.73333333 0.77777778 0.8 ]
|
|
|
|
mean value: 0.7790170940170941
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90909091 1. 0.81818182 0.54545455 0.58333333 0.83333333
|
|
1. 0.91666667 0.63636364 0.72727273]
|
|
|
|
mean value: 0.796969696969697
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.82954545 0.83333333 0.74242424 0.68939394 0.70075758 0.82575758
|
|
0.95454545 0.77651515 0.72727273 0.77272727]
|
|
|
|
mean value: 0.7852272727272727
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.71428571 0.73333333 0.6 0.46153846 0.5 0.71428571
|
|
0.92307692 0.6875 0.53846154 0.61538462]
|
|
|
|
mean value: 0.6487866300366301
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01075053 0.01030135 0.00908804 0.00905085 0.00904369 0.01010799
|
|
0.0100894 0.0101018 0.00930357 0.00907087]
|
|
|
|
mean value: 0.009690809249877929
|
|
|
|
key: score_time
|
|
value: [0.01013565 0.00923514 0.00874352 0.00862527 0.00860524 0.00943875
|
|
0.00945067 0.00937891 0.00866175 0.00867438]
|
|
|
|
mean value: 0.009094929695129395
|
|
|
|
key: test_mcc
|
|
value: [ 0.47727273 0.48856385 -0.04545455 0.48075018 0.44411739 -0.03816905
|
|
0.13740858 0.21374669 -0.09759001 -0.18257419]
|
|
|
|
mean value: 0.18780716298837632
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.73913043 0.47826087 0.73913043 0.69565217 0.47826087
|
|
0.56521739 0.60869565 0.45454545 0.40909091]
|
|
|
|
mean value: 0.5907114624505929
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.75 0.45454545 0.7 0.63157895 0.45454545
|
|
0.54545455 0.64 0.33333333 0.43478261]
|
|
|
|
mean value: 0.5671513071215588
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.69230769 0.45454545 0.77777778 0.85714286 0.5
|
|
0.6 0.61538462 0.42857143 0.41666667]
|
|
|
|
mean value: 0.6069669219669219
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.81818182 0.45454545 0.63636364 0.5 0.41666667
|
|
0.5 0.66666667 0.27272727 0.45454545]
|
|
|
|
mean value: 0.5446969696969697
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.73863636 0.74242424 0.47727273 0.73484848 0.70454545 0.48106061
|
|
0.56818182 0.60606061 0.45454545 0.40909091]
|
|
|
|
mean value: 0.5916666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.6 0.29411765 0.53846154 0.46153846 0.29411765
|
|
0.375 0.47058824 0.2 0.27777778]
|
|
|
|
mean value: 0.4083029878618114
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.32547903 1.41488957 1.40615559 1.33278108 1.30856895 1.30703354
|
|
1.31076121 1.31396508 1.30783343 1.31076026]
|
|
|
|
mean value: 1.333822774887085
|
|
|
|
key: score_time
|
|
value: [0.15590978 0.09691024 0.09611034 0.08798575 0.09535575 0.09329295
|
|
0.08852673 0.09049702 0.09529018 0.08904791]
|
|
|
|
mean value: 0.0988926649093628
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.63327851 0.39393939 0.65151515 0.74242424 0.83971912
|
|
0.82575758 0.65909298 0.63636364 0.73029674]
|
|
|
|
mean value: 0.6852866944934071
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.7826087 0.69565217 0.82608696 0.86956522 0.91304348
|
|
0.91304348 0.82608696 0.81818182 0.86363636]
|
|
|
|
mean value: 0.8377470355731225
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.81481481 0.69565217 0.81818182 0.86956522 0.90909091
|
|
0.91666667 0.84615385 0.81818182 0.85714286]
|
|
|
|
mean value: 0.8402592978679935
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.6875 0.66666667 0.81818182 0.90909091 1.
|
|
0.91666667 0.78571429 0.81818182 0.9 ]
|
|
|
|
mean value: 0.8402002164502165
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 1. 0.72727273 0.81818182 0.83333333 0.83333333
|
|
0.91666667 0.91666667 0.81818182 0.81818182]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.79166667 0.6969697 0.82575758 0.87121212 0.91666667
|
|
0.91287879 0.8219697 0.81818182 0.86363636]
|
|
|
|
mean value: 0.8386363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.6875 0.53333333 0.69230769 0.76923077 0.83333333
|
|
0.84615385 0.73333333 0.69230769 0.75 ]
|
|
|
|
mean value: 0.72875
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.8421793 0.98309755 0.89729023 0.8816216 0.92900515 0.9211638
|
|
0.88418436 0.92679811 0.93264508 0.93030334]
|
|
|
|
mean value: 0.9128288507461548
|
|
|
|
key: score_time
|
|
value: [0.2220726 0.20171928 0.21628141 0.22029281 0.17704153 0.23349524
|
|
0.24436331 0.23651242 0.19798326 0.22944474]
|
|
|
|
mean value: 0.21792066097259521
|
|
|
|
key: test_mcc
|
|
value: [0.65151515 0.63327851 0.48856385 0.65151515 0.56490196 0.83971912
|
|
0.74047959 0.74047959 0.63636364 0.73029674]
|
|
|
|
mean value: 0.6677113300585276
|
|
|
|
key: train_mcc
|
|
value: [0.97077583 0.97077583 0.98067223 0.9516192 0.96116136 0.96116136
|
|
0.95163291 0.9707786 0.95186015 0.97091955]
|
|
|
|
mean value: 0.9641356995103791
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.7826087 0.73913043 0.82608696 0.7826087 0.91304348
|
|
0.86956522 0.86956522 0.81818182 0.86363636]
|
|
|
|
mean value: 0.8290513833992095
|
|
|
|
key: train_accuracy
|
|
value: [0.98536585 0.98536585 0.9902439 0.97560976 0.9804878 0.9804878
|
|
0.97560976 0.98536585 0.97572816 0.98543689]
|
|
|
|
mean value: 0.9819701633909543
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.81481481 0.75 0.81818182 0.8 0.90909091
|
|
0.88 0.88 0.81818182 0.85714286]
|
|
|
|
mean value: 0.8345594035594036
|
|
|
|
key: train_fscore
|
|
value: [0.98550725 0.98550725 0.99038462 0.97607656 0.98058252 0.98058252
|
|
0.97584541 0.98536585 0.97607656 0.98550725]
|
|
|
|
mean value: 0.9821435777393142
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.6875 0.69230769 0.81818182 0.76923077 1.
|
|
0.84615385 0.84615385 0.81818182 0.9 ]
|
|
|
|
mean value: 0.8195891608391609
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.98076923 0.98076923 0.98095238 0.96226415 0.97115385 0.97115385
|
|
0.96190476 0.98058252 0.96226415 0.98076923]
|
|
|
|
mean value: 0.9732583353631165
|
|
|
|
key: test_recall
|
|
value: [0.81818182 1. 0.81818182 0.81818182 0.83333333 0.83333333
|
|
0.91666667 0.91666667 0.81818182 0.81818182]
|
|
|
|
mean value: 0.8590909090909091
|
|
|
|
key: train_recall
|
|
value: [0.99029126 0.99029126 1. 0.99029126 0.99019608 0.99019608
|
|
0.99019608 0.99019608 0.99029126 0.99029126]
|
|
|
|
mean value: 0.9912240624405102
|
|
|
|
key: test_roc_auc
|
|
value: [0.82575758 0.79166667 0.74242424 0.82575758 0.78030303 0.91666667
|
|
0.86742424 0.86742424 0.81818182 0.86363636]
|
|
|
|
mean value: 0.8299242424242425
|
|
|
|
key: train_roc_auc
|
|
value: [0.98534171 0.98534171 0.99019608 0.97553779 0.98053493 0.98053493
|
|
0.97568056 0.9853893 0.97572816 0.98543689]
|
|
|
|
mean value: 0.9819722063582714
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.6875 0.6 0.69230769 0.66666667 0.83333333
|
|
0.78571429 0.78571429 0.69230769 0.75 ]
|
|
|
|
mean value: 0.7185851648351649
|
|
|
|
key: train_jcc
|
|
value: [0.97142857 0.97142857 0.98095238 0.95327103 0.96190476 0.96190476
|
|
0.95283019 0.97115385 0.95327103 0.97142857]
|
|
|
|
mean value: 0.9649573709955477
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02420473 0.01008201 0.01005197 0.01018906 0.01016712 0.01026821
|
|
0.01024914 0.01026487 0.01008368 0.0101018 ]
|
|
|
|
mean value: 0.011566257476806641
|
|
|
|
key: score_time
|
|
value: [0.01045871 0.00948524 0.00967598 0.00949144 0.00947976 0.0094893
|
|
0.00947714 0.00952911 0.00955606 0.00959349]
|
|
|
|
mean value: 0.00962362289428711
|
|
|
|
key: test_mcc
|
|
value: [0.65909298 0.21452908 0.12336594 0.21452908 0.08257228 0.44411739
|
|
0.08257228 0.23262105 0.32539569 0.23570226]
|
|
|
|
mean value: 0.26144980489209724
|
|
|
|
key: train_mcc
|
|
value: [0.431714 0.44379575 0.47690661 0.38794503 0.41929975 0.43858746
|
|
0.45614118 0.4454215 0.40723148 0.39531893]
|
|
|
|
mean value: 0.430236169300088
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.60869565 0.56521739 0.60869565 0.52173913 0.69565217
|
|
0.52173913 0.60869565 0.63636364 0.59090909]
|
|
|
|
mean value: 0.6183794466403162
|
|
|
|
key: train_accuracy
|
|
value: [0.70243902 0.70731707 0.72195122 0.67804878 0.69268293 0.70731707
|
|
0.71707317 0.70731707 0.69417476 0.68446602]
|
|
|
|
mean value: 0.7012787118162443
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.52631579 0.44444444 0.52631579 0.35294118 0.63157895
|
|
0.35294118 0.57142857 0.5 0.4 ]
|
|
|
|
mean value: 0.5105965895129981
|
|
|
|
key: train_fscore
|
|
value: [0.64327485 0.64705882 0.66272189 0.60240964 0.61349693 0.64705882
|
|
0.6627907 0.63855422 0.64 0.61538462]
|
|
|
|
mean value: 0.6372750495347176
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.625 0.57142857 0.625 0.6 0.85714286
|
|
0.6 0.66666667 0.8 0.75 ]
|
|
|
|
mean value: 0.6984126984126984
|
|
|
|
key: train_precision
|
|
value: [0.80882353 0.82089552 0.84848485 0.79365079 0.81967213 0.80882353
|
|
0.81428571 0.828125 0.77777778 0.78787879]
|
|
|
|
mean value: 0.8108417634437052
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.45454545 0.36363636 0.45454545 0.25 0.5
|
|
0.25 0.5 0.36363636 0.27272727]
|
|
|
|
mean value: 0.41363636363636364
|
|
|
|
key: train_recall
|
|
value: [0.53398058 0.53398058 0.54368932 0.48543689 0.49019608 0.53921569
|
|
0.55882353 0.51960784 0.54368932 0.50485437]
|
|
|
|
mean value: 0.5253474205216067
|
|
|
|
key: test_roc_auc
|
|
value: [0.8219697 0.60227273 0.55681818 0.60227273 0.53409091 0.70454545
|
|
0.53409091 0.61363636 0.63636364 0.59090909]
|
|
|
|
mean value: 0.6196969696969696
|
|
|
|
key: train_roc_auc
|
|
value: [0.7032648 0.70816676 0.72282505 0.67899296 0.69169998 0.70650105
|
|
0.71630497 0.70640586 0.69417476 0.68446602]
|
|
|
|
mean value: 0.7012802208261946
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.35714286 0.28571429 0.35714286 0.21428571 0.46153846
|
|
0.21428571 0.4 0.33333333 0.25 ]
|
|
|
|
mean value: 0.354010989010989
|
|
|
|
key: train_jcc
|
|
value: [0.47413793 0.47826087 0.49557522 0.43103448 0.44247788 0.47826087
|
|
0.49565217 0.46902655 0.47058824 0.44444444]
|
|
|
|
mean value: 0.4679458652592843
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.2580533 0.05313063 0.05868006 0.05744362 0.05369687 0.06253695
|
|
0.06104183 0.06061697 0.06067872 0.07018757]
|
|
|
|
mean value: 0.07960665225982666
|
|
|
|
key: score_time
|
|
value: [0.01125717 0.01169109 0.01044297 0.01053381 0.01151872 0.0110507
|
|
0.01123476 0.01140285 0.01065278 0.01143217]
|
|
|
|
mean value: 0.011121702194213868
|
|
|
|
key: test_mcc
|
|
value: [0.58002308 0.58930667 0.66414149 1. 0.74242424 0.83971912
|
|
0.83743579 1. 1. 1. ]
|
|
|
|
mean value: 0.8253050384253398
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.7826087 0.82608696 1. 0.86956522 0.91304348
|
|
0.91304348 1. 1. 1. ]
|
|
|
|
mean value: 0.908695652173913
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.8 0.83333333 1. 0.86956522 0.90909091
|
|
0.92307692 1. 1. 1. ]
|
|
|
|
mean value: 0.9071908488155628
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.71428571 0.76923077 1. 0.90909091 1.
|
|
0.85714286 1. 1. 1. ]
|
|
|
|
mean value: 0.912475024975025
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.90909091 0.90909091 1. 0.83333333 0.83333333
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9121212121212121
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77651515 0.78787879 0.82954545 1. 0.87121212 0.91666667
|
|
0.90909091 1. 1. 1. ]
|
|
|
|
mean value: 0.9090909090909091
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.66666667 0.71428571 1. 0.76923077 0.83333333
|
|
0.85714286 1. 1. 1. ]
|
|
|
|
mean value: 0.8423992673992674
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02735448 0.02864647 0.0365417 0.02909827 0.02844095 0.02818394
|
|
0.05325174 0.04603481 0.02615213 0.02718377]
|
|
|
|
mean value: 0.03308882713317871
|
|
|
|
key: score_time
|
|
value: [0.01265574 0.0118897 0.01186728 0.01191807 0.01186371 0.01190019
|
|
0.02155924 0.01197457 0.01198792 0.0118432 ]
|
|
|
|
mean value: 0.012945961952209473
|
|
|
|
key: test_mcc
|
|
value: [0.48075018 0.65909298 0.76764947 0.56490196 0.58930667 0.65909298
|
|
0.65151515 0.58930667 0.81818182 0.68313005]
|
|
|
|
mean value: 0.6462927922925813
|
|
|
|
key: train_mcc
|
|
value: [0.93174679 0.96116136 0.95126131 0.94146202 0.9707786 0.96116136
|
|
0.96116136 0.96097468 0.93208276 0.94192516]
|
|
|
|
mean value: 0.9513715399106522
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.82608696 0.86956522 0.7826087 0.7826087 0.82608696
|
|
0.82608696 0.7826087 0.90909091 0.81818182]
|
|
|
|
mean value: 0.8162055335968379
|
|
|
|
key: train_accuracy
|
|
value: [0.96585366 0.9804878 0.97560976 0.97073171 0.98536585 0.9804878
|
|
0.9804878 0.9804878 0.96601942 0.97087379]
|
|
|
|
mean value: 0.9756405399005447
|
|
|
|
key: test_fscore
|
|
value: [0.7 0.8 0.88 0.76190476 0.76190476 0.84615385
|
|
0.83333333 0.76190476 0.90909091 0.77777778]
|
|
|
|
mean value: 0.8032070152070152
|
|
|
|
key: train_fscore
|
|
value: [0.96618357 0.98039216 0.97584541 0.97087379 0.98536585 0.98058252
|
|
0.98058252 0.98039216 0.96618357 0.97115385]
|
|
|
|
mean value: 0.9757555408875802
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.88888889 0.78571429 0.8 0.88888889 0.78571429
|
|
0.83333333 0.88888889 0.90909091 1. ]
|
|
|
|
mean value: 0.8558297258297258
|
|
|
|
key: train_precision
|
|
value: [0.96153846 0.99009901 0.97115385 0.97087379 0.98058252 0.97115385
|
|
0.97115385 0.98039216 0.96153846 0.96190476]
|
|
|
|
mean value: 0.972039070088657
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.72727273 1. 0.72727273 0.66666667 0.91666667
|
|
0.83333333 0.66666667 0.90909091 0.63636364]
|
|
|
|
mean value: 0.771969696969697
|
|
|
|
key: train_recall
|
|
value: [0.97087379 0.97087379 0.98058252 0.97087379 0.99019608 0.99019608
|
|
0.99019608 0.98039216 0.97087379 0.98058252]
|
|
|
|
mean value: 0.979564058633162
|
|
|
|
key: test_roc_auc
|
|
value: [0.73484848 0.8219697 0.875 0.78030303 0.78787879 0.8219697
|
|
0.82575758 0.78787879 0.90909091 0.81818182]
|
|
|
|
mean value: 0.8162878787878787
|
|
|
|
key: train_roc_auc
|
|
value: [0.96582905 0.98053493 0.97558538 0.97073101 0.9853893 0.98053493
|
|
0.98053493 0.98048734 0.96601942 0.97087379]
|
|
|
|
mean value: 0.9756520083761661
|
|
|
|
key: test_jcc
|
|
value: [0.53846154 0.66666667 0.78571429 0.61538462 0.61538462 0.73333333
|
|
0.71428571 0.61538462 0.83333333 0.63636364]
|
|
|
|
mean value: 0.6754312354312354
|
|
|
|
key: train_jcc
|
|
value: [0.93457944 0.96153846 0.95283019 0.94339623 0.97115385 0.96190476
|
|
0.96190476 0.96153846 0.93457944 0.94392523]
|
|
|
|
mean value: 0.9527350820284165
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02102685 0.0097661 0.00906491 0.00905061 0.008991 0.00937319
|
|
0.0090394 0.00941396 0.00884175 0.00902915]
|
|
|
|
mean value: 0.010359692573547363
|
|
|
|
key: score_time
|
|
value: [0.00948429 0.00991464 0.00870991 0.00855541 0.0086112 0.00870061
|
|
0.00849533 0.00886989 0.00867701 0.00849438]
|
|
|
|
mean value: 0.008851265907287598
|
|
|
|
key: test_mcc
|
|
value: [0.38932432 0.23262105 0.3030303 0.12878788 0.39393939 0.21374669
|
|
0.5164589 0.21969697 0.09245003 0.37796447]
|
|
|
|
mean value: 0.2868020012621178
|
|
|
|
key: train_mcc
|
|
value: [0.35608875 0.3660859 0.37560698 0.42436935 0.3755949 0.35621133
|
|
0.3658258 0.41462022 0.43763636 0.40824829]
|
|
|
|
mean value: 0.3880287875437212
|
|
|
|
key: test_accuracy
|
|
value: [0.69565217 0.60869565 0.65217391 0.56521739 0.69565217 0.60869565
|
|
0.73913043 0.60869565 0.54545455 0.68181818]
|
|
|
|
mean value: 0.6401185770750988
|
|
|
|
key: train_accuracy
|
|
value: [0.67804878 0.68292683 0.68780488 0.71219512 0.68780488 0.67804878
|
|
0.68292683 0.70731707 0.7184466 0.7038835 ]
|
|
|
|
mean value: 0.6939403267819086
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.64 0.63636364 0.54545455 0.69565217 0.64
|
|
0.7 0.60869565 0.58333333 0.63157895]
|
|
|
|
mean value: 0.634774495527356
|
|
|
|
key: train_fscore
|
|
value: [0.68269231 0.67980296 0.69230769 0.71497585 0.68627451 0.67961165
|
|
0.67980296 0.70588235 0.72641509 0.71090047]
|
|
|
|
mean value: 0.6958665838244484
|
|
|
|
key: test_precision
|
|
value: [0.7 0.57142857 0.63636364 0.54545455 0.72727273 0.61538462
|
|
0.875 0.63636364 0.53846154 0.75 ]
|
|
|
|
mean value: 0.659572927072927
|
|
|
|
key: train_precision
|
|
value: [0.67619048 0.69 0.68571429 0.71153846 0.68627451 0.67307692
|
|
0.68316832 0.70588235 0.70642202 0.69444444]
|
|
|
|
mean value: 0.6912711788889996
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.72727273 0.63636364 0.54545455 0.66666667 0.66666667
|
|
0.58333333 0.58333333 0.63636364 0.54545455]
|
|
|
|
mean value: 0.6227272727272727
|
|
|
|
key: train_recall
|
|
value: [0.68932039 0.66990291 0.69902913 0.7184466 0.68627451 0.68627451
|
|
0.67647059 0.70588235 0.74757282 0.72815534]
|
|
|
|
mean value: 0.7007329145250334
|
|
|
|
key: test_roc_auc
|
|
value: [0.69318182 0.61363636 0.65151515 0.56439394 0.6969697 0.60606061
|
|
0.74621212 0.60984848 0.54545455 0.68181818]
|
|
|
|
mean value: 0.6409090909090909
|
|
|
|
key: train_roc_auc
|
|
value: [0.67799353 0.68299067 0.68774986 0.71216448 0.68779745 0.67808871
|
|
0.68289549 0.70731011 0.7184466 0.7038835 ]
|
|
|
|
mean value: 0.6939320388349515
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.47058824 0.46666667 0.375 0.53333333 0.47058824
|
|
0.53846154 0.4375 0.41176471 0.46153846]
|
|
|
|
mean value: 0.46654411764705883
|
|
|
|
key: train_jcc
|
|
value: [0.51824818 0.51492537 0.52941176 0.55639098 0.52238806 0.51470588
|
|
0.51492537 0.54545455 0.57037037 0.55147059]
|
|
|
|
mean value: 0.5338291109715274
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01177096 0.01734757 0.01680732 0.01921105 0.01541638 0.01629424
|
|
0.01723695 0.01650763 0.01626301 0.01551557]
|
|
|
|
mean value: 0.01623706817626953
|
|
|
|
key: score_time
|
|
value: [0.00875068 0.01169848 0.01196933 0.01190495 0.01174927 0.01198626
|
|
0.01195526 0.01173544 0.01152921 0.01165438]
|
|
|
|
mean value: 0.011493325233459473
|
|
|
|
key: test_mcc
|
|
value: [0.91666667 0.5164589 0.41096386 0.74242424 0.56818182 0.6992059
|
|
0.58930667 0.74242424 0.54232614 0.36514837]
|
|
|
|
mean value: 0.6093106813380207
|
|
|
|
key: train_mcc
|
|
value: [0.80613459 0.94164684 0.91429989 0.94163576 0.88310329 0.82593778
|
|
0.67701604 0.91224062 0.79681907 0.89527379]
|
|
|
|
mean value: 0.8594107664325119
|
|
|
|
key: test_accuracy
|
|
value: [0.95652174 0.73913043 0.69565217 0.86956522 0.7826087 0.82608696
|
|
0.7826087 0.86956522 0.72727273 0.68181818]
|
|
|
|
mean value: 0.7930830039525691
|
|
|
|
key: train_accuracy
|
|
value: [0.89756098 0.97073171 0.95609756 0.97073171 0.94146341 0.90731707
|
|
0.81463415 0.95609756 0.88834951 0.94660194]
|
|
|
|
mean value: 0.9249585602652143
|
|
|
|
key: test_fscore
|
|
value: [0.95652174 0.76923077 0.72 0.86956522 0.7826087 0.8
|
|
0.76190476 0.86956522 0.78571429 0.66666667]
|
|
|
|
mean value: 0.79817773530817
|
|
|
|
key: train_fscore
|
|
value: [0.9058296 0.97058824 0.95774648 0.97115385 0.94174757 0.89839572
|
|
0.77108434 0.95609756 0.89956332 0.94835681]
|
|
|
|
mean value: 0.9220563476088464
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.66666667 0.64285714 0.83333333 0.81818182 1.
|
|
0.88888889 0.90909091 0.64705882 0.7 ]
|
|
|
|
mean value: 0.8022744249214837
|
|
|
|
key: train_precision
|
|
value: [0.84166667 0.98019802 0.92727273 0.96190476 0.93269231 0.98823529
|
|
1. 0.95145631 0.81746032 0.91818182]
|
|
|
|
mean value: 0.9319068223777838
|
|
|
|
key: test_recall
|
|
value: [1. 0.90909091 0.81818182 0.90909091 0.75 0.66666667
|
|
0.66666667 0.83333333 1. 0.63636364]
|
|
|
|
mean value: 0.818939393939394
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.96116505 0.99029126 0.98058252 0.95098039 0.82352941
|
|
0.62745098 0.96078431 1. 0.98058252]
|
|
|
|
mean value: 0.9255948981534361
|
|
|
|
key: test_roc_auc
|
|
value: [0.95833333 0.74621212 0.70075758 0.87121212 0.78409091 0.83333333
|
|
0.78787879 0.87121212 0.72727273 0.68181818]
|
|
|
|
mean value: 0.7962121212121211
|
|
|
|
key: train_roc_auc
|
|
value: [0.89715401 0.9707786 0.95592994 0.97068342 0.94150961 0.90691034
|
|
0.81372549 0.95612031 0.88834951 0.94660194]
|
|
|
|
mean value: 0.9247763182943081
|
|
|
|
key: test_jcc
|
|
value: [0.91666667 0.625 0.5625 0.76923077 0.64285714 0.66666667
|
|
0.61538462 0.76923077 0.64705882 0.5 ]
|
|
|
|
mean value: 0.6714595453566042
|
|
|
|
key: train_jcc
|
|
value: [0.82786885 0.94285714 0.91891892 0.94392523 0.88990826 0.81553398
|
|
0.62745098 0.91588785 0.81746032 0.90178571]
|
|
|
|
mean value: 0.8601597247948675
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01530957 0.01498342 0.01542163 0.01644206 0.01473165 0.01477385
|
|
0.01383638 0.0155046 0.01537108 0.01408386]
|
|
|
|
mean value: 0.015045809745788574
|
|
|
|
key: score_time
|
|
value: [0.01238942 0.01142144 0.01168561 0.01138973 0.01165462 0.01171398
|
|
0.01134014 0.01167512 0.01169944 0.01163602]
|
|
|
|
mean value: 0.011660552024841309
|
|
|
|
key: test_mcc
|
|
value: [0.32232919 0.41096386 0.3030303 0.56490196 0.76277007 0.82575758
|
|
0.40451992 0.66414149 0.40824829 0.40824829]
|
|
|
|
mean value: 0.5074910937104659
|
|
|
|
key: train_mcc
|
|
value: [0.47469541 0.90259929 0.9707786 0.92479811 0.86761151 0.86052253
|
|
0.37926401 0.803912 0.61850654 0.82977382]
|
|
|
|
mean value: 0.7632461822687774
|
|
|
|
key: test_accuracy
|
|
value: [0.60869565 0.69565217 0.65217391 0.7826087 0.86956522 0.91304348
|
|
0.65217391 0.82608696 0.68181818 0.68181818]
|
|
|
|
mean value: 0.7363636363636363
|
|
|
|
key: train_accuracy
|
|
value: [0.68292683 0.95121951 0.98536585 0.96097561 0.93170732 0.92682927
|
|
0.62439024 0.89268293 0.77669903 0.90776699]
|
|
|
|
mean value: 0.8640563580393086
|
|
|
|
key: test_fscore
|
|
value: [0.30769231 0.72 0.63636364 0.76190476 0.88888889 0.91666667
|
|
0.75 0.81818182 0.74074074 0.58823529]
|
|
|
|
mean value: 0.7128674114556468
|
|
|
|
key: train_fscore
|
|
value: [0.53900709 0.95192308 0.98536585 0.95959596 0.93457944 0.92146597
|
|
0.72597865 0.87912088 0.81746032 0.89839572]
|
|
|
|
mean value: 0.8612892956408041
|
|
|
|
key: test_precision
|
|
value: [1. 0.64285714 0.63636364 0.8 0.8 0.91666667
|
|
0.6 0.9 0.625 0.83333333]
|
|
|
|
mean value: 0.775422077922078
|
|
|
|
key: train_precision
|
|
value: [1. 0.94285714 0.99019608 1. 0.89285714 0.98876404
|
|
0.5698324 1. 0.69127517 1. ]
|
|
|
|
mean value: 0.907578197910935
|
|
|
|
key: test_recall
|
|
value: [0.18181818 0.81818182 0.63636364 0.72727273 1. 0.91666667
|
|
1. 0.75 0.90909091 0.45454545]
|
|
|
|
mean value: 0.7393939393939394
|
|
|
|
key: train_recall
|
|
value: [0.36893204 0.96116505 0.98058252 0.9223301 0.98039216 0.8627451
|
|
1. 0.78431373 1. 0.81553398]
|
|
|
|
mean value: 0.8675994669712546
|
|
|
|
key: test_roc_auc
|
|
value: [0.59090909 0.70075758 0.65151515 0.78030303 0.86363636 0.91287879
|
|
0.63636364 0.82954545 0.68181818 0.68181818]
|
|
|
|
mean value: 0.7329545454545454
|
|
|
|
key: train_roc_auc
|
|
value: [0.68446602 0.95117076 0.9853893 0.96116505 0.93194365 0.92651818
|
|
0.62621359 0.89215686 0.77669903 0.90776699]
|
|
|
|
mean value: 0.8643489434608795
|
|
|
|
key: test_jcc
|
|
value: [0.18181818 0.5625 0.46666667 0.61538462 0.8 0.84615385
|
|
0.6 0.69230769 0.58823529 0.41666667]
|
|
|
|
mean value: 0.5769732963115316
|
|
|
|
key: train_jcc
|
|
value: [0.36893204 0.90825688 0.97115385 0.9223301 0.87719298 0.85436893
|
|
0.5698324 0.78431373 0.69127517 0.81553398]
|
|
|
|
mean value: 0.7763190053397688
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.14774132 0.12597418 0.12698054 0.12672639 0.12538242 0.1247561
|
|
0.12422967 0.12333274 0.12338328 0.1225481 ]
|
|
|
|
mean value: 0.1271054744720459
|
|
|
|
key: score_time
|
|
value: [0.01492047 0.01496315 0.0151732 0.01502323 0.01556945 0.01488686
|
|
0.01481652 0.01488638 0.0149827 0.01499629]
|
|
|
|
mean value: 0.015021824836730957
|
|
|
|
key: test_mcc
|
|
value: [0.91605722 0.58930667 0.66414149 1. 0.66414149 0.91666667
|
|
0.76277007 0.82575758 1. 1. ]
|
|
|
|
mean value: 0.8338841181236702
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95652174 0.7826087 0.82608696 1. 0.82608696 0.95652174
|
|
0.86956522 0.91304348 1. 1. ]
|
|
|
|
mean value: 0.9130434782608696
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95238095 0.8 0.83333333 1. 0.81818182 0.95652174
|
|
0.88888889 0.91666667 1. 1. ]
|
|
|
|
mean value: 0.9165973398582095
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.71428571 0.76923077 1. 0.9 1.
|
|
0.8 0.91666667 1. 1. ]
|
|
|
|
mean value: 0.910018315018315
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.90909091 0.90909091 1. 0.75 0.91666667
|
|
1. 0.91666667 1. 1. ]
|
|
|
|
mean value: 0.931060606060606
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95454545 0.78787879 0.82954545 1. 0.82954545 0.95833333
|
|
0.86363636 0.91287879 1. 1. ]
|
|
|
|
mean value: 0.9136363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.90909091 0.66666667 0.71428571 1. 0.69230769 0.91666667
|
|
0.8 0.84615385 1. 1. ]
|
|
|
|
mean value: 0.8545171495171495
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04642153 0.04462147 0.04027104 0.04553676 0.04686022 0.05081582
|
|
0.03903246 0.04629207 0.04119587 0.04431295]
|
|
|
|
mean value: 0.04453601837158203
|
|
|
|
key: score_time
|
|
value: [0.01880813 0.02288532 0.02414727 0.0243566 0.02825975 0.02347636
|
|
0.02017665 0.02507305 0.02807355 0.02321911]
|
|
|
|
mean value: 0.023847579956054688
|
|
|
|
key: test_mcc
|
|
value: [0.50168817 0.58930667 0.56818182 1. 0.76764947 0.74242424
|
|
0.83743579 0.91666667 0.91287093 0.81818182]
|
|
|
|
mean value: 0.7654405571559706
|
|
|
|
key: train_mcc
|
|
value: [1. 0.98048734 0.98067587 1. 0.99029034 0.98067223
|
|
0.99029034 0.97114302 0.97128586 0.99033794]
|
|
|
|
mean value: 0.9855182943104235
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.7826087 0.7826087 1. 0.86956522 0.86956522
|
|
0.91304348 0.95652174 0.95454545 0.90909091]
|
|
|
|
mean value: 0.8776679841897234
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.9902439 0.9902439 1. 0.99512195 0.9902439
|
|
0.99512195 0.98536585 0.98543689 0.99514563]
|
|
|
|
mean value: 0.9926923987686479
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.7826087 1. 0.85714286 0.86956522
|
|
0.92307692 0.95652174 0.95238095 0.90909091]
|
|
|
|
mean value: 0.8717053960532221
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99029126 0.99019608 1. 0.99507389 0.99009901
|
|
0.99507389 0.98507463 0.98522167 0.99512195]
|
|
|
|
mean value: 0.9926152386681547
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.71428571 0.75 1. 1. 0.90909091
|
|
0.85714286 1. 1. 0.90909091]
|
|
|
|
mean value: 0.8996753246753246
|
|
|
|
key: train_precision
|
|
value: [1. 0.99029126 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9990291262135922
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.90909091 0.81818182 1. 0.75 0.83333333
|
|
1. 0.91666667 0.90909091 0.90909091]
|
|
|
|
mean value: 0.8590909090909091
|
|
|
|
key: train_recall
|
|
value: [1. 0.99029126 0.98058252 1. 0.99019608 0.98039216
|
|
0.99019608 0.97058824 0.97087379 0.99029126]
|
|
|
|
mean value: 0.9863411383971065
|
|
|
|
key: test_roc_auc
|
|
value: [0.73106061 0.78787879 0.78409091 1. 0.875 0.87121212
|
|
0.90909091 0.95833333 0.95454545 0.90909091]
|
|
|
|
mean value: 0.878030303030303
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99024367 0.99029126 1. 0.99509804 0.99019608
|
|
0.99509804 0.98529412 0.98543689 0.99514563]
|
|
|
|
mean value: 0.9926803731201218
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.64285714 1. 0.75 0.76923077
|
|
0.85714286 0.91666667 0.90909091 0.83333333]
|
|
|
|
mean value: 0.7844988344988345
|
|
|
|
key: train_jcc
|
|
value: [1. 0.98076923 0.98058252 1. 0.99019608 0.98039216
|
|
0.99019608 0.97058824 0.97087379 0.99029126]
|
|
|
|
mean value: 0.9853889352604372
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.045012 0.04318142 0.0282228 0.02828217 0.0310216 0.08236885
|
|
0.0660882 0.06425071 0.07074213 0.07848597]
|
|
|
|
mean value: 0.05376558303833008
|
|
|
|
key: score_time
|
|
value: [0.02342558 0.01257706 0.01256967 0.01259303 0.02418852 0.02631974
|
|
0.02399254 0.02026963 0.02379179 0.02296281]
|
|
|
|
mean value: 0.020269036293029785
|
|
|
|
key: test_mcc
|
|
value: [0.47727273 0.48856385 0.31252706 0.03816905 0.5164589 0.66414149
|
|
0.5164589 0.38932432 0.29277002 0.18898224]
|
|
|
|
mean value: 0.3884668558613338
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99033794]
|
|
|
|
mean value: 0.9990337937660287
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.73913043 0.65217391 0.52173913 0.73913043 0.82608696
|
|
0.73913043 0.69565217 0.63636364 0.59090909]
|
|
|
|
mean value: 0.6879446640316206
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99514563]
|
|
|
|
mean value: 0.9995145631067961
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.75 0.55555556 0.47619048 0.7 0.81818182
|
|
0.7 0.72 0.55555556 0.52631579]
|
|
|
|
mean value: 0.6529071922229817
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99512195]
|
|
|
|
mean value: 0.9995121951219512
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.69230769 0.71428571 0.5 0.875 0.9
|
|
0.875 0.69230769 0.71428571 0.625 ]
|
|
|
|
mean value: 0.731545954045954
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.81818182 0.45454545 0.45454545 0.58333333 0.75
|
|
0.58333333 0.75 0.45454545 0.45454545]
|
|
|
|
mean value: 0.603030303030303
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99029126]
|
|
|
|
mean value: 0.9990291262135922
|
|
|
|
key: test_roc_auc
|
|
value: [0.73863636 0.74242424 0.64393939 0.51893939 0.74621212 0.82954545
|
|
0.74621212 0.69318182 0.63636364 0.59090909]
|
|
|
|
mean value: 0.6886363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99514563]
|
|
|
|
mean value: 0.9995145631067961
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.6 0.38461538 0.3125 0.53846154 0.69230769
|
|
0.53846154 0.5625 0.38461538 0.35714286]
|
|
|
|
mean value: 0.4942032967032967
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99029126]
|
|
|
|
mean value: 0.9990291262135922
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.43832111 0.41460967 0.41730499 0.418607 0.41266894 0.41043901
|
|
0.41033554 0.40992641 0.41230726 0.40911317]
|
|
|
|
mean value: 0.4153633117675781
|
|
|
|
key: score_time
|
|
value: [0.00984025 0.00928712 0.00983596 0.00985432 0.00924468 0.0091536
|
|
0.00908923 0.00908732 0.00944901 0.00996041]
|
|
|
|
mean value: 0.00948019027709961
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.5164589 0.48856385 1. 0.76764947 0.91666667
|
|
0.76277007 1. 1. 1. ]
|
|
|
|
mean value: 0.8192588557461732
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.73913043 0.73913043 1. 0.86956522 0.95652174
|
|
0.86956522 1. 1. 1. ]
|
|
|
|
mean value: 0.9043478260869565
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.76923077 0.75 1. 0.85714286 0.95652174
|
|
0.88888889 1. 1. 1. ]
|
|
|
|
mean value: 0.9078927111535807
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.66666667 0.69230769 1. 1. 1.
|
|
0.8 1. 1. 1. ]
|
|
|
|
mean value: 0.9058974358974359
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.90909091 0.81818182 1. 0.75 0.91666667
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9212121212121213
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.74621212 0.74242424 1. 0.875 0.95833333
|
|
0.86363636 1. 1. 1. ]
|
|
|
|
mean value: 0.9053030303030303
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.625 0.6 1. 0.75 0.91666667
|
|
0.8 1. 1. 1. ]
|
|
|
|
mean value: 0.8441666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02009869 0.02192163 0.02121377 0.02089286 0.03631091 0.02019191
|
|
0.0366838 0.02073383 0.03662992 0.03478622]
|
|
|
|
mean value: 0.026946353912353515
|
|
|
|
key: score_time
|
|
value: [0.01233768 0.01223588 0.01664376 0.01710796 0.01231337 0.01765871
|
|
0.01238084 0.017627 0.01230049 0.01220369]
|
|
|
|
mean value: 0.014280939102172851
|
|
|
|
key: test_mcc
|
|
value: [0.47727273 0.83971912 0.23262105 0.56818182 0.91605722 0.62050523
|
|
0.91605722 0.65909298 0.63636364 0.64715023]
|
|
|
|
mean value: 0.6513021246123849
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.91304348 0.60869565 0.7826087 0.95652174 0.7826087
|
|
0.95652174 0.82608696 0.81818182 0.81818182]
|
|
|
|
mean value: 0.8201581027667985
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.91666667 0.64 0.7826087 0.96 0.82758621
|
|
0.96 0.84615385 0.81818182 0.83333333]
|
|
|
|
mean value: 0.8311803294157117
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.84615385 0.57142857 0.75 0.92307692 0.70588235
|
|
0.92307692 0.78571429 0.81818182 0.76923077]
|
|
|
|
mean value: 0.782001821707704
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 1. 0.72727273 0.81818182 1. 1.
|
|
1. 0.91666667 0.81818182 0.90909091]
|
|
|
|
mean value: 0.8916666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.73863636 0.91666667 0.61363636 0.78409091 0.95454545 0.77272727
|
|
0.95454545 0.8219697 0.81818182 0.81818182]
|
|
|
|
mean value: 0.8193181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.84615385 0.47058824 0.64285714 0.92307692 0.70588235
|
|
0.92307692 0.73333333 0.69230769 0.71428571]
|
|
|
|
mean value: 0.7222990734755441
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02222586 0.02766562 0.0338943 0.02159619 0.03423214 0.03410792
|
|
0.03486061 0.03411412 0.03433919 0.03421783]
|
|
|
|
mean value: 0.031125378608703614
|
|
|
|
key: score_time
|
|
value: [0.02374887 0.02142906 0.02060509 0.02018595 0.02212143 0.0235455
|
|
0.02057886 0.02279162 0.0233283 0.02117443]
|
|
|
|
mean value: 0.021950912475585938
|
|
|
|
key: test_mcc
|
|
value: [0.82575758 0.74242424 0.56818182 0.82575758 0.66414149 0.82575758
|
|
0.74047959 0.82575758 0.63636364 0.46225016]
|
|
|
|
mean value: 0.7116871241591681
|
|
|
|
key: train_mcc
|
|
value: [0.9024367 0.93175328 0.92194936 0.91224062 0.93175328 0.93174679
|
|
0.94163576 0.90259929 0.89324598 0.93243443]
|
|
|
|
mean value: 0.9201795515216483
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.86956522 0.7826087 0.91304348 0.82608696 0.91304348
|
|
0.86956522 0.91304348 0.81818182 0.72727273]
|
|
|
|
mean value: 0.8545454545454545
|
|
|
|
key: train_accuracy
|
|
value: [0.95121951 0.96585366 0.96097561 0.95609756 0.96585366 0.96585366
|
|
0.97073171 0.95121951 0.94660194 0.96601942]
|
|
|
|
mean value: 0.9600426237272082
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.86956522 0.7826087 0.90909091 0.81818182 0.91666667
|
|
0.88 0.91666667 0.81818182 0.7 ]
|
|
|
|
mean value: 0.8520052700922266
|
|
|
|
key: train_fscore
|
|
value: [0.95145631 0.96585366 0.96116505 0.95609756 0.96585366 0.96551724
|
|
0.97029703 0.95049505 0.9468599 0.96650718]
|
|
|
|
mean value: 0.9600102638274448
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.83333333 0.75 0.90909091 0.9 0.91666667
|
|
0.84615385 0.91666667 0.81818182 0.77777778]
|
|
|
|
mean value: 0.8576961926961927
|
|
|
|
key: train_precision
|
|
value: [0.95145631 0.97058824 0.96116505 0.96078431 0.96116505 0.97029703
|
|
0.98 0.96 0.94230769 0.95283019]
|
|
|
|
mean value: 0.9610593867476506
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.90909091 0.81818182 0.90909091 0.75 0.91666667
|
|
0.91666667 0.91666667 0.81818182 0.63636364]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:135: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:138: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.95145631 0.96116505 0.96116505 0.95145631 0.97058824 0.96078431
|
|
0.96078431 0.94117647 0.95145631 0.98058252]
|
|
|
|
mean value: 0.9590614886731392
|
|
|
|
key: test_roc_auc
|
|
value: [0.91287879 0.87121212 0.78409091 0.91287879 0.82954545 0.91287879
|
|
0.86742424 0.91287879 0.81818182 0.72727273]
|
|
|
|
mean value: 0.8549242424242424
|
|
|
|
key: train_roc_auc
|
|
value: [0.95121835 0.96587664 0.96097468 0.95612031 0.96587664 0.96582905
|
|
0.97068342 0.95117076 0.94660194 0.96601942]
|
|
|
|
mean value: 0.9600371216447744
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.76923077 0.64285714 0.83333333 0.69230769 0.84615385
|
|
0.78571429 0.84615385 0.69230769 0.53846154]
|
|
|
|
mean value: 0.747985347985348
|
|
|
|
key: train_jcc
|
|
value: [0.90740741 0.93396226 0.92523364 0.91588785 0.93396226 0.93333333
|
|
0.94230769 0.90566038 0.89908257 0.93518519]
|
|
|
|
mean value: 0.9232022588028438
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.34404731 0.23672104 0.2441833 0.23749495 0.25613022 0.24685955
|
|
0.24196959 0.28543353 0.30825043 0.23037481]
|
|
|
|
mean value: 0.2631464719772339
|
|
|
|
key: score_time
|
|
value: [0.02091432 0.02341533 0.02169251 0.01936412 0.01624894 0.0224669
|
|
0.01370192 0.01648545 0.02128911 0.02203679]
|
|
|
|
mean value: 0.019761538505554198
|
|
|
|
key: test_mcc
|
|
value: [0.82575758 0.65909298 0.56818182 0.82575758 0.66414149 0.82575758
|
|
0.74047959 0.82575758 0.73029674 0.46225016]
|
|
|
|
mean value: 0.7127473088509607
|
|
|
|
key: train_mcc
|
|
value: [0.9024367 0.93175328 0.92194936 0.91224062 0.95163291 0.93174679
|
|
0.94163576 0.90259929 0.93208276 0.93243443]
|
|
|
|
mean value: 0.9260511918867643
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.82608696 0.7826087 0.91304348 0.82608696 0.91304348
|
|
0.86956522 0.91304348 0.86363636 0.72727273]
|
|
|
|
mean value: 0.8547430830039525
|
|
|
|
key: train_accuracy
|
|
value: [0.95121951 0.96585366 0.96097561 0.95609756 0.97560976 0.96585366
|
|
0.97073171 0.95121951 0.96601942 0.96601942]
|
|
|
|
mean value: 0.9629599810561212
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.8 0.7826087 0.90909091 0.81818182 0.91666667
|
|
0.88 0.91666667 0.85714286 0.7 ]
|
|
|
|
mean value: 0.8489448522492
|
|
|
|
key: train_fscore
|
|
value: [0.95145631 0.96585366 0.96116505 0.95609756 0.97584541 0.96551724
|
|
0.97029703 0.95049505 0.96618357 0.96650718]
|
|
|
|
mean value: 0.9629418061863466
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.88888889 0.75 0.90909091 0.9 0.91666667
|
|
0.84615385 0.91666667 0.9 0.77777778]
|
|
|
|
mean value: 0.8714335664335664
|
|
|
|
key: train_precision
|
|
value: [0.95145631 0.97058824 0.96116505 0.96078431 0.96190476 0.97029703
|
|
0.98 0.96 0.96153846 0.95283019]
|
|
|
|
mean value: 0.9630564350068348
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.72727273 0.81818182 0.90909091 0.75 0.91666667
|
|
0.91666667 0.91666667 0.81818182 0.63636364]
|
|
|
|
mean value: 0.8318181818181818
|
|
|
|
key: train_recall
|
|
value: [0.95145631 0.96116505 0.96116505 0.95145631 0.99019608 0.96078431
|
|
0.96078431 0.94117647 0.97087379 0.98058252]
|
|
|
|
mean value: 0.9629640205596802
|
|
|
|
key: test_roc_auc
|
|
value: [0.91287879 0.8219697 0.78409091 0.91287879 0.82954545 0.91287879
|
|
0.86742424 0.91287879 0.86363636 0.72727273]
|
|
|
|
mean value: 0.8545454545454545
|
|
|
|
key: train_roc_auc
|
|
value: [0.95121835 0.96587664 0.96097468 0.95612031 0.97568056 0.96582905
|
|
0.97068342 0.95117076 0.96601942 0.96601942]
|
|
|
|
mean value: 0.9629592613744528
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.66666667 0.64285714 0.83333333 0.69230769 0.84615385
|
|
0.78571429 0.84615385 0.75 0.53846154]
|
|
|
|
mean value: 0.7434981684981685
|
|
|
|
key: train_jcc
|
|
value: [0.90740741 0.93396226 0.92523364 0.91588785 0.95283019 0.93333333
|
|
0.94230769 0.90566038 0.93457944 0.93518519]
|
|
|
|
mean value: 0.9286387383001737
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0311234 0.03104281 0.03307748 0.03271508 0.03284502 0.03534842
|
|
0.0292809 0.03247356 0.0327239 0.0364511 ]
|
|
|
|
mean value: 0.032708168029785156
|
|
|
|
key: score_time
|
|
value: [0.01306868 0.01228809 0.01467776 0.01206303 0.01464558 0.01219606
|
|
0.01215053 0.01212502 0.0147841 0.01199746]
|
|
|
|
mean value: 0.012999629974365235
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.5164589 0.48856385 0.56490196 0.74047959 0.83971912
|
|
0.74047959 0.91666667 0.63636364 0.18257419]
|
|
|
|
mean value: 0.6366687092895582
|
|
|
|
key: train_mcc
|
|
value: [0.86356283 0.84451258 0.87352395 0.8350976 0.82455974 0.84407425
|
|
0.88361919 0.81564443 0.81742389 0.91266437]
|
|
|
|
mean value: 0.8514682834181677
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.73913043 0.73913043 0.7826087 0.86956522 0.91304348
|
|
0.86956522 0.95652174 0.81818182 0.59090909]
|
|
|
|
mean value: 0.8148221343873517
|
|
|
|
key: train_accuracy
|
|
value: [0.93170732 0.92195122 0.93658537 0.91707317 0.91219512 0.92195122
|
|
0.94146341 0.90731707 0.90776699 0.95631068]
|
|
|
|
mean value: 0.9254321572341937
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.76923077 0.75 0.76190476 0.88 0.90909091
|
|
0.88 0.95652174 0.81818182 0.57142857]
|
|
|
|
mean value: 0.8153501426110121
|
|
|
|
key: train_fscore
|
|
value: [0.93269231 0.92380952 0.93779904 0.91943128 0.91262136 0.9223301
|
|
0.94230769 0.90909091 0.91079812 0.95652174]
|
|
|
|
mean value: 0.926740207309033
|
|
|
|
key: test_precision
|
|
value: [0.9 0.66666667 0.69230769 0.8 0.84615385 1.
|
|
0.84615385 1. 0.81818182 0.6 ]
|
|
|
|
mean value: 0.816946386946387
|
|
|
|
key: train_precision
|
|
value: [0.92380952 0.90654206 0.9245283 0.89814815 0.90384615 0.91346154
|
|
0.9245283 0.88785047 0.88181818 0.95192308]
|
|
|
|
mean value: 0.9116455750144694
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.90909091 0.81818182 0.72727273 0.91666667 0.83333333
|
|
0.91666667 0.91666667 0.81818182 0.54545455]
|
|
|
|
mean value: 0.821969696969697
|
|
|
|
key: train_recall
|
|
value: [0.94174757 0.94174757 0.95145631 0.94174757 0.92156863 0.93137255
|
|
0.96078431 0.93137255 0.94174757 0.96116505]
|
|
|
|
mean value: 0.9424709689701123
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.74621212 0.74242424 0.78030303 0.86742424 0.91666667
|
|
0.86742424 0.95833333 0.81818182 0.59090909]
|
|
|
|
mean value: 0.8155303030303029
|
|
|
|
key: train_roc_auc
|
|
value: [0.9316581 0.92185418 0.93651247 0.91695222 0.91224062 0.92199695
|
|
0.94155721 0.90743385 0.90776699 0.95631068]
|
|
|
|
mean value: 0.925428326670474
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.625 0.6 0.61538462 0.78571429 0.83333333
|
|
0.78571429 0.91666667 0.69230769 0.4 ]
|
|
|
|
mean value: 0.7004120879120879
|
|
|
|
key: train_jcc
|
|
value: [0.87387387 0.85840708 0.88288288 0.85087719 0.83928571 0.85585586
|
|
0.89090909 0.83333333 0.8362069 0.91666667]
|
|
|
|
mean value: 0.8638298586987616
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.93439674 0.79571605 0.78386688 0.91385674 0.78201485 0.82960582
|
|
0.88547063 0.7484262 0.93659878 0.74970579]
|
|
|
|
mean value: 0.8359658479690552
|
|
|
|
key: score_time
|
|
value: [0.01902795 0.01569152 0.0154388 0.01550126 0.01569033 0.01552463
|
|
0.01555157 0.01228809 0.01748419 0.01834369]
|
|
|
|
mean value: 0.016054201126098632
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.56818182 0.56818182 0.65151515 0.76764947 0.91666667
|
|
0.56490196 0.91666667 0.75592895 0.46225016]
|
|
|
|
mean value: 0.691242224971122
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99033794]
|
|
|
|
mean value: 0.9990337937660287
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.7826087 0.7826087 0.82608696 0.86956522 0.95652174
|
|
0.7826087 0.95652174 0.86363636 0.72727273]
|
|
|
|
mean value: 0.841699604743083
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99514563]
|
|
|
|
mean value: 0.9995145631067961
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.7826087 0.7826087 0.81818182 0.85714286 0.95652174
|
|
0.8 0.95652174 0.84210526 0.7 ]
|
|
|
|
mean value: 0.8352833665190644
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99512195]
|
|
|
|
mean value: 0.9995121951219512
|
|
|
|
key: test_precision
|
|
value: [0.9 0.75 0.75 0.81818182 1. 1.
|
|
0.76923077 1. 1. 0.77777778]
|
|
|
|
mean value: 0.8765190365190365
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.81818182 0.81818182 0.81818182 0.75 0.91666667
|
|
0.83333333 0.91666667 0.72727273 0.63636364]
|
|
|
|
mean value: 0.8053030303030303
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99029126]
|
|
|
|
mean value: 0.9990291262135922
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.78409091 0.78409091 0.82575758 0.875 0.95833333
|
|
0.78030303 0.95833333 0.86363636 0.72727273]
|
|
|
|
mean value: 0.8424242424242424
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99514563]
|
|
|
|
mean value: 0.9995145631067961
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.64285714 0.64285714 0.69230769 0.75 0.91666667
|
|
0.66666667 0.91666667 0.72727273 0.53846154]
|
|
|
|
mean value: 0.7243756243756244
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.99029126]
|
|
|
|
mean value: 0.9990291262135922
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02677464 0.01045084 0.01013684 0.00999856 0.01005387 0.01009083
|
|
0.00991368 0.0105505 0.01012111 0.01038074]
|
|
|
|
mean value: 0.011847162246704101
|
|
|
|
key: score_time
|
|
value: [0.01160312 0.00989413 0.00953531 0.00954342 0.00953102 0.0099299
|
|
0.00961757 0.00996542 0.00982594 0.00982404]
|
|
|
|
mean value: 0.009926986694335938
|
|
|
|
key: test_mcc
|
|
value: [0.44411739 0.41096386 0.41096386 0.15096491 0.22407133 0.74047959
|
|
0.42228828 0.24960096 0.09759001 0.18898224]
|
|
|
|
mean value: 0.3340022427057291
|
|
|
|
key: train_mcc
|
|
value: [0.36627048 0.417866 0.45930893 0.40305908 0.501235 0.431714
|
|
0.49387839 0.43730041 0.46621721 0.50903935]
|
|
|
|
mean value: 0.44858888387912876
|
|
|
|
key: test_accuracy
|
|
value: [0.69565217 0.69565217 0.69565217 0.56521739 0.60869565 0.86956522
|
|
0.69565217 0.60869565 0.54545455 0.59090909]
|
|
|
|
mean value: 0.6571146245059288
|
|
|
|
key: train_accuracy
|
|
value: [0.65853659 0.69756098 0.71707317 0.67317073 0.74146341 0.70243902
|
|
0.73170732 0.70731707 0.73300971 0.74271845]
|
|
|
|
mean value: 0.7104996448022732
|
|
|
|
key: test_fscore
|
|
value: [0.74074074 0.72 0.72 0.61538462 0.68965517 0.88
|
|
0.75862069 0.70967742 0.61538462 0.64 ]
|
|
|
|
mean value: 0.7089463252933775
|
|
|
|
key: train_fscore
|
|
value: [0.72868217 0.74166667 0.75833333 0.74131274 0.77056277 0.74476987
|
|
0.76987448 0.74576271 0.72906404 0.77637131]
|
|
|
|
mean value: 0.7506400093172734
|
|
|
|
key: test_precision
|
|
value: [0.625 0.64285714 0.64285714 0.53333333 0.58823529 0.84615385
|
|
0.64705882 0.57894737 0.53333333 0.57142857]
|
|
|
|
mean value: 0.6209204856031482
|
|
|
|
key: train_precision
|
|
value: [0.60645161 0.64963504 0.66423358 0.61538462 0.68992248 0.64963504
|
|
0.67153285 0.65671642 0.74 0.68656716]
|
|
|
|
mean value: 0.6630078787347913
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.81818182 0.81818182 0.72727273 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.72727273 0.72727273]
|
|
|
|
mean value: 0.831060606060606
|
|
|
|
key: train_recall
|
|
value: [0.91262136 0.86407767 0.88349515 0.93203883 0.87254902 0.87254902
|
|
0.90196078 0.8627451 0.7184466 0.89320388]
|
|
|
|
mean value: 0.8713687416714259
|
|
|
|
key: test_roc_auc
|
|
value: [0.70454545 0.70075758 0.70075758 0.5719697 0.59848485 0.86742424
|
|
0.68560606 0.59469697 0.54545455 0.59090909]
|
|
|
|
mean value: 0.656060606060606
|
|
|
|
key: train_roc_auc
|
|
value: [0.65729107 0.69674472 0.71625738 0.67190177 0.74209975 0.7032648
|
|
0.73253379 0.70807158 0.73300971 0.74271845]
|
|
|
|
mean value: 0.7103893013516086
|
|
|
|
key: test_jcc
|
|
value: [0.58823529 0.5625 0.5625 0.44444444 0.52631579 0.78571429
|
|
0.61111111 0.55 0.44444444 0.47058824]
|
|
|
|
mean value: 0.5545853604599734
|
|
|
|
key: train_jcc
|
|
value: [0.57317073 0.58940397 0.61073826 0.58895706 0.62676056 0.59333333
|
|
0.62585034 0.59459459 0.57364341 0.63448276]
|
|
|
|
mean value: 0.6010935016383199
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00957608 0.0098474 0.00965476 0.00997877 0.01009202 0.00920963
|
|
0.00911617 0.00889516 0.00902343 0.00907636]
|
|
|
|
mean value: 0.009446978569030762
|
|
|
|
key: score_time
|
|
value: [0.00914979 0.00911403 0.00903225 0.00959492 0.00927424 0.0087173
|
|
0.00860095 0.00873423 0.00871611 0.00861526]
|
|
|
|
mean value: 0.008954906463623047
|
|
|
|
key: test_mcc
|
|
value: [0.58002308 0.12878788 0.12336594 0.21452908 0.39393939 0.39393939
|
|
0.05427825 0.39393939 0.18257419 0.20412415]
|
|
|
|
mean value: 0.26695007377892416
|
|
|
|
key: train_mcc
|
|
value: [0.43994849 0.50824626 0.49637007 0.45056913 0.46832513 0.45757548
|
|
0.46948042 0.48928361 0.48018451 0.46191786]
|
|
|
|
mean value: 0.47219009507999327
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.56521739 0.56521739 0.60869565 0.69565217 0.69565217
|
|
0.52173913 0.69565217 0.59090909 0.59090909]
|
|
|
|
mean value: 0.6312252964426878
|
|
|
|
key: train_accuracy
|
|
value: [0.71707317 0.75121951 0.74634146 0.72195122 0.72682927 0.72682927
|
|
0.73170732 0.74146341 0.73786408 0.72815534]
|
|
|
|
mean value: 0.7329434051622069
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.54545455 0.44444444 0.52631579 0.69565217 0.69565217
|
|
0.47619048 0.69565217 0.57142857 0.47058824]
|
|
|
|
mean value: 0.5858220689288127
|
|
|
|
key: train_fscore
|
|
value: [0.69473684 0.73298429 0.73195876 0.6984127 0.68539326 0.70526316
|
|
0.70588235 0.71657754 0.71875 0.70526316]
|
|
|
|
mean value: 0.7095222063862845
|
|
|
|
key: test_precision
|
|
value: [0.875 0.54545455 0.57142857 0.625 0.72727273 0.72727273
|
|
0.55555556 0.72727273 0.6 0.66666667]
|
|
|
|
mean value: 0.6620923520923521
|
|
|
|
key: train_precision
|
|
value: [0.75862069 0.79545455 0.78021978 0.76744186 0.80263158 0.76136364
|
|
0.77647059 0.78823529 0.7752809 0.77011494]
|
|
|
|
mean value: 0.7775833814863701
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.54545455 0.36363636 0.45454545 0.66666667 0.66666667
|
|
0.41666667 0.66666667 0.54545455 0.36363636]
|
|
|
|
mean value: 0.5325757575757576
|
|
|
|
key: train_recall
|
|
value: [0.6407767 0.67961165 0.68932039 0.6407767 0.59803922 0.65686275
|
|
0.64705882 0.65686275 0.66990291 0.65048544]
|
|
|
|
mean value: 0.6529697315819532
|
|
|
|
key: test_roc_auc
|
|
value: [0.77651515 0.56439394 0.55681818 0.60227273 0.6969697 0.6969697
|
|
0.52651515 0.6969697 0.59090909 0.59090909]
|
|
|
|
mean value: 0.6299242424242424
|
|
|
|
key: train_roc_auc
|
|
value: [0.71744717 0.75157053 0.74662098 0.72234913 0.72620407 0.72648962
|
|
0.7312964 0.74105273 0.73786408 0.72815534]
|
|
|
|
mean value: 0.7329050066628594
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.375 0.28571429 0.35714286 0.53333333 0.53333333
|
|
0.3125 0.53333333 0.4 0.30769231]
|
|
|
|
mean value: 0.4221382783882784
|
|
|
|
key: train_jcc
|
|
value: [0.53225806 0.5785124 0.57723577 0.53658537 0.52136752 0.54471545
|
|
0.54545455 0.55833333 0.56097561 0.54471545]
|
|
|
|
mean value: 0.5500153503642167
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00882125 0.00910711 0.01014447 0.00869751 0.00881767 0.0097332
|
|
0.00888371 0.00917006 0.00906801 0.00977707]
|
|
|
|
mean value: 0.009222006797790528
|
|
|
|
key: score_time
|
|
value: [0.01601529 0.01748419 0.01591444 0.00995803 0.00983834 0.01029491
|
|
0.01002693 0.01065493 0.01047945 0.01068568]
|
|
|
|
mean value: 0.01213521957397461
|
|
|
|
key: test_mcc
|
|
value: [ 0.12406456 -0.05427825 0.22407133 0.39727608 -0.03816905 0.15096491
|
|
0.23262105 0.21452908 -0.09245003 -0.18257419]
|
|
|
|
mean value: 0.0976055502533363
|
|
|
|
key: train_mcc
|
|
value: [0.54175 0.5037683 0.49637007 0.50824626 0.49294992 0.54256731
|
|
0.54702284 0.44388387 0.495239 0.55433939]
|
|
|
|
mean value: 0.5126136961359685
|
|
|
|
key: test_accuracy
|
|
value: [0.56521739 0.47826087 0.60869565 0.69565217 0.47826087 0.56521739
|
|
0.60869565 0.60869565 0.45454545 0.40909091]
|
|
|
|
mean value: 0.5472332015810277
|
|
|
|
key: train_accuracy
|
|
value: [0.77073171 0.75121951 0.74634146 0.75121951 0.74634146 0.77073171
|
|
0.77073171 0.72195122 0.74757282 0.77669903]
|
|
|
|
mean value: 0.7553540137343121
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.4 0.47058824 0.63157895 0.45454545 0.5
|
|
0.57142857 0.66666667 0.4 0.38095238]
|
|
|
|
mean value: 0.49757602562556125
|
|
|
|
key: train_fscore
|
|
value: [0.76847291 0.74371859 0.73195876 0.73298429 0.74 0.76142132
|
|
0.75132275 0.71921182 0.75 0.77 ]
|
|
|
|
mean value: 0.7469090449228885
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.44444444 0.66666667 0.75 0.5 0.625
|
|
0.66666667 0.6 0.44444444 0.4 ]
|
|
|
|
mean value: 0.5652777777777778
|
|
|
|
key: train_precision
|
|
value: [0.78 0.77083333 0.78021978 0.79545455 0.75510204 0.78947368
|
|
0.81609195 0.72277228 0.74285714 0.79381443]
|
|
|
|
mean value: 0.7746619191132057
|
|
|
|
key: test_recall
|
|
value: [0.45454545 0.36363636 0.36363636 0.54545455 0.41666667 0.41666667
|
|
0.5 0.75 0.36363636 0.36363636]
|
|
|
|
mean value: 0.4537878787878788
|
|
|
|
key: train_recall
|
|
value: [0.75728155 0.7184466 0.68932039 0.67961165 0.7254902 0.73529412
|
|
0.69607843 0.71568627 0.75728155 0.74757282]
|
|
|
|
mean value: 0.722206358271464
|
|
|
|
key: test_roc_auc
|
|
value: [0.56060606 0.47348485 0.59848485 0.68939394 0.48106061 0.5719697
|
|
0.61363636 0.60227273 0.45454545 0.40909091]
|
|
|
|
mean value: 0.5454545454545454
|
|
|
|
key: train_roc_auc
|
|
value: [0.77079764 0.75138016 0.74662098 0.75157053 0.74624024 0.77055968
|
|
0.77036931 0.72192081 0.74757282 0.77669903]
|
|
|
|
mean value: 0.7553731201218352
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.25 0.30769231 0.46153846 0.29411765 0.33333333
|
|
0.4 0.5 0.25 0.23529412]
|
|
|
|
mean value: 0.3365309200603318
|
|
|
|
key: train_jcc
|
|
value: [0.624 0.592 0.57723577 0.5785124 0.58730159 0.6147541
|
|
0.60169492 0.56153846 0.6 0.62601626]
|
|
|
|
mean value: 0.5963053491669482
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01364207 0.01871586 0.01185751 0.01367044 0.01809502 0.01582408
|
|
0.01185703 0.01197171 0.0172205 0.0127852 ]
|
|
|
|
mean value: 0.014563941955566406
|
|
|
|
key: score_time
|
|
value: [0.01032472 0.01589632 0.01048732 0.01059628 0.01564956 0.00968361
|
|
0.00964212 0.01131916 0.01068449 0.00989556]
|
|
|
|
mean value: 0.011417913436889648
|
|
|
|
key: test_mcc
|
|
value: [0.47727273 0.56818182 0.31298622 0.38932432 0.47727273 0.58930667
|
|
0.56490196 0.65909298 0.18898224 0. ]
|
|
|
|
mean value: 0.4227321652529521
|
|
|
|
key: train_mcc
|
|
value: [0.81495251 0.73751939 0.82438607 0.74685628 0.71733345 0.74633543
|
|
0.76638754 0.72814868 0.71088536 0.77761579]
|
|
|
|
mean value: 0.7570420501696654
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.7826087 0.65217391 0.69565217 0.73913043 0.7826087
|
|
0.7826087 0.82608696 0.59090909 0.5 ]
|
|
|
|
mean value: 0.7090909090909091
|
|
|
|
key: train_accuracy
|
|
value: [0.90731707 0.86829268 0.91219512 0.87317073 0.85853659 0.87317073
|
|
0.88292683 0.86341463 0.85436893 0.88834951]
|
|
|
|
mean value: 0.8781742836845844
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.7826087 0.66666667 0.66666667 0.75 0.76190476
|
|
0.8 0.84615385 0.64 0.52173913]
|
|
|
|
mean value: 0.7163012494751625
|
|
|
|
key: train_fscore
|
|
value: [0.90909091 0.86567164 0.91262136 0.87619048 0.85572139 0.87254902
|
|
0.88 0.86666667 0.85981308 0.89099526]
|
|
|
|
mean value: 0.8789319810380724
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.75 0.61538462 0.7 0.75 0.88888889
|
|
0.76923077 0.78571429 0.57142857 0.5 ]
|
|
|
|
mean value: 0.7057919857919858
|
|
|
|
key: train_precision
|
|
value: [0.89622642 0.8877551 0.91262136 0.85981308 0.86868687 0.87254902
|
|
0.89795918 0.84259259 0.82882883 0.87037037]
|
|
|
|
mean value: 0.8737402824230579
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.81818182 0.72727273 0.63636364 0.75 0.66666667
|
|
0.83333333 0.91666667 0.72727273 0.54545455]
|
|
|
|
mean value: 0.7348484848484849
|
|
|
|
key: train_recall
|
|
value: [0.9223301 0.84466019 0.91262136 0.89320388 0.84313725 0.87254902
|
|
0.8627451 0.89215686 0.89320388 0.91262136]
|
|
|
|
mean value: 0.8849229011993147
|
|
|
|
key: test_roc_auc
|
|
value: [0.73863636 0.78409091 0.65530303 0.69318182 0.73863636 0.78787879
|
|
0.78030303 0.8219697 0.59090909 0.5 ]
|
|
|
|
mean value: 0.7090909090909091
|
|
|
|
key: train_roc_auc
|
|
value: [0.90724348 0.86840853 0.91219303 0.87307253 0.85846183 0.87316771
|
|
0.88282886 0.86355416 0.85436893 0.88834951]
|
|
|
|
mean value: 0.8781648581762802
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.64285714 0.5 0.5 0.6 0.61538462
|
|
0.66666667 0.73333333 0.47058824 0.35294118]
|
|
|
|
mean value: 0.5653199741435035
|
|
|
|
key: train_jcc
|
|
value: [0.83333333 0.76315789 0.83928571 0.77966102 0.74782609 0.77391304
|
|
0.78571429 0.76470588 0.75409836 0.8034188 ]
|
|
|
|
mean value: 0.7845114421881593
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.26194549 1.00380898 0.56620073 0.398633 0.38738036 0.52770424
|
|
0.82983422 0.21668768 0.39636707 0.45531631]
|
|
|
|
mean value: 0.6043878078460694
|
|
|
|
key: score_time
|
|
value: [0.01219726 0.01478028 0.0121758 0.01218724 0.01211095 0.01325655
|
|
0.01218605 0.01219201 0.01216078 0.01219201]
|
|
|
|
mean value: 0.012543892860412598
|
|
|
|
key: test_mcc
|
|
value: [ 0.65151515 0.56490196 0.41096386 0.38932432 0.56879646 0.76277007
|
|
0.83971912 0.12844577 0.09090909 -0.18257419]
|
|
|
|
mean value: 0.42247716177937644
|
|
|
|
key: train_mcc
|
|
value: [0.86600321 0.95126131 0.71892689 0.58007639 0.61699176 0.64768695
|
|
0.76036002 0.52267493 0.58321184 0.80643358]
|
|
|
|
mean value: 0.7053626889094041
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.7826087 0.69565217 0.69565217 0.73913043 0.86956522
|
|
0.91304348 0.56521739 0.54545455 0.40909091]
|
|
|
|
mean value: 0.7041501976284584
|
|
|
|
key: train_accuracy
|
|
value: [0.93170732 0.97560976 0.84390244 0.76097561 0.7902439 0.79512195
|
|
0.87804878 0.75609756 0.79126214 0.90291262]
|
|
|
|
mean value: 0.8425882074354725
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.76190476 0.72 0.66666667 0.66666667 0.88888889
|
|
0.90909091 0.66666667 0.54545455 0.43478261]
|
|
|
|
mean value: 0.7078303532216575
|
|
|
|
key: train_fscore
|
|
value: [0.93457944 0.97584541 0.86440678 0.80478088 0.74556213 0.82926829
|
|
0.87046632 0.77678571 0.78606965 0.9047619 ]
|
|
|
|
mean value: 0.8492526520928274
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.8 0.64285714 0.7 1. 0.8
|
|
1. 0.55555556 0.54545455 0.41666667]
|
|
|
|
mean value: 0.7278715728715729
|
|
|
|
key: train_precision
|
|
value: [0.9009009 0.97115385 0.76691729 0.68243243 0.94029851 0.70833333
|
|
0.92307692 0.71311475 0.80612245 0.88785047]
|
|
|
|
mean value: 0.8300200906960877
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.72727273 0.81818182 0.63636364 0.5 1.
|
|
0.83333333 0.83333333 0.54545455 0.45454545]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_recall
|
|
value: [0.97087379 0.98058252 0.99029126 0.98058252 0.61764706 1.
|
|
0.82352941 0.85294118 0.76699029 0.9223301 ]
|
|
|
|
mean value: 0.8905768132495717
|
|
|
|
key: test_roc_auc
|
|
value: [0.82575758 0.78030303 0.70075758 0.69318182 0.75 0.86363636
|
|
0.91666667 0.5530303 0.54545455 0.40909091]
|
|
|
|
mean value: 0.7037878787878787
|
|
|
|
key: train_roc_auc
|
|
value: [0.93151532 0.97558538 0.84318485 0.75989911 0.78940605 0.7961165
|
|
0.87778412 0.75656768 0.79126214 0.90291262]
|
|
|
|
mean value: 0.8424233771178374
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.61538462 0.5625 0.5 0.5 0.8
|
|
0.83333333 0.5 0.375 0.27777778]
|
|
|
|
mean value: 0.5656303418803419
|
|
|
|
key: train_jcc
|
|
value: [0.87719298 0.95283019 0.76119403 0.67333333 0.59433962 0.70833333
|
|
0.7706422 0.6350365 0.64754098 0.82608696]
|
|
|
|
mean value: 0.7446530128607832
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01721716 0.01397586 0.01314878 0.01377201 0.01334882 0.01375103
|
|
0.01316309 0.0134654 0.01368165 0.01338196]
|
|
|
|
mean value: 0.013890576362609864
|
|
|
|
key: score_time
|
|
value: [0.01574564 0.00902081 0.00891137 0.00908399 0.00947738 0.0089283
|
|
0.00954652 0.00874829 0.00894642 0.00875568]
|
|
|
|
mean value: 0.009716439247131347
|
|
|
|
key: test_mcc
|
|
value: [0.65909298 0.82575758 0.65151515 1. 1. 0.91666667
|
|
1. 0.74242424 0.73029674 1. ]
|
|
|
|
mean value: 0.8525753362069441
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.91304348 0.82608696 1. 1. 0.95652174
|
|
1. 0.86956522 0.86363636 1. ]
|
|
|
|
mean value: 0.925494071146245
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.90909091 0.81818182 1. 1. 0.95652174
|
|
1. 0.86956522 0.85714286 1. ]
|
|
|
|
mean value: 0.9210502540937323
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.90909091 0.81818182 1. 1. 1.
|
|
1. 0.90909091 0.9 1. ]
|
|
|
|
mean value: 0.9425252525252525
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.90909091 0.81818182 1. 1. 0.91666667
|
|
1. 0.83333333 0.81818182 1. ]
|
|
|
|
mean value: 0.9022727272727273
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8219697 0.91287879 0.82575758 1. 1. 0.95833333
|
|
1. 0.87121212 0.86363636 1. ]
|
|
|
|
mean value: 0.9253787878787879
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.83333333 0.69230769 1. 1. 0.91666667
|
|
1. 0.76923077 0.75 1. ]
|
|
|
|
mean value: 0.8628205128205129
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09688735 0.09758043 0.09672451 0.0968442 0.09741735 0.09862638
|
|
0.09707808 0.09623432 0.09967422 0.10141206]
|
|
|
|
mean value: 0.09784789085388183
|
|
|
|
key: score_time
|
|
value: [0.01796174 0.01781249 0.01779795 0.01884341 0.0174849 0.01741695
|
|
0.01794338 0.01800895 0.0174613 0.017524 ]
|
|
|
|
mean value: 0.017825508117675783
|
|
|
|
key: test_mcc
|
|
value: [0.76764947 0.91666667 0.56818182 0.38932432 0.41096386 0.82575758
|
|
0.82575758 0.83743579 0.83205029 0.45454545]
|
|
|
|
mean value: 0.6828332828329148
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.95652174 0.7826087 0.69565217 0.69565217 0.91304348
|
|
0.91304348 0.91304348 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8375494071146244
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88 0.95652174 0.7826087 0.66666667 0.66666667 0.91666667
|
|
0.91666667 0.92307692 0.9 0.72727273]
|
|
|
|
mean value: 0.8336146751798925
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78571429 0.91666667 0.75 0.7 0.77777778 0.91666667
|
|
0.91666667 0.85714286 1. 0.72727273]
|
|
|
|
mean value: 0.8347907647907647
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.81818182 0.63636364 0.58333333 0.91666667
|
|
0.91666667 1. 0.81818182 0.72727273]
|
|
|
|
mean value: 0.8416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.95833333 0.78409091 0.69318182 0.70075758 0.91287879
|
|
0.91287879 0.90909091 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8382575757575758
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.78571429 0.91666667 0.64285714 0.5 0.5 0.84615385
|
|
0.84615385 0.85714286 0.81818182 0.57142857]
|
|
|
|
mean value: 0.7284299034299034
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00908017 0.00915766 0.00907207 0.00912404 0.00897431 0.00912595
|
|
0.00911784 0.00910449 0.01033187 0.00916791]
|
|
|
|
mean value: 0.00922563076019287
|
|
|
|
key: score_time
|
|
value: [0.00860286 0.00855422 0.00866795 0.00879788 0.00872707 0.00869775
|
|
0.00873923 0.00875854 0.00952125 0.00869274]
|
|
|
|
mean value: 0.008775949478149414
|
|
|
|
key: test_mcc
|
|
value: [0.03816905 0.56490196 0.30240737 0.03178209 0.65151515 0.5164589
|
|
0.38932432 0.74242424 0.56694671 0.36514837]
|
|
|
|
mean value: 0.416907815921681
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.52173913 0.7826087 0.65217391 0.52173913 0.82608696 0.73913043
|
|
0.69565217 0.86956522 0.77272727 0.68181818]
|
|
|
|
mean value: 0.7063241106719368
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.47619048 0.76190476 0.6 0.42105263 0.83333333 0.7
|
|
0.72 0.86956522 0.73684211 0.66666667]
|
|
|
|
mean value: 0.6785555192328647
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.8 0.66666667 0.5 0.83333333 0.875
|
|
0.69230769 0.90909091 0.875 0.7 ]
|
|
|
|
mean value: 0.7351398601398601
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.45454545 0.72727273 0.54545455 0.36363636 0.83333333 0.58333333
|
|
0.75 0.83333333 0.63636364 0.63636364]
|
|
|
|
mean value: 0.6363636363636364
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.51893939 0.78030303 0.64772727 0.51515152 0.82575758 0.74621212
|
|
0.69318182 0.87121212 0.77272727 0.68181818]
|
|
|
|
mean value: 0.7053030303030303
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.3125 0.61538462 0.42857143 0.26666667 0.71428571 0.53846154
|
|
0.5625 0.76923077 0.58333333 0.5 ]
|
|
|
|
mean value: 0.5290934065934066
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.33109474 1.30057764 1.29539728 1.27576232 1.28721762 1.28771162
|
|
1.30014181 1.29339123 1.31685042 1.30856156]
|
|
|
|
mean value: 1.2996706247329712
|
|
|
|
key: score_time
|
|
value: [0.09498 0.09537101 0.09173775 0.0889883 0.09702682 0.0884161
|
|
0.09529257 0.09618378 0.09623957 0.09434962]
|
|
|
|
mean value: 0.09385855197906494
|
|
|
|
key: test_mcc
|
|
value: [0.65909298 0.91666667 0.56818182 0.74047959 0.66414149 0.91666667
|
|
0.91605722 0.74242424 0.81818182 0.73029674]
|
|
|
|
mean value: 0.7672189240726363
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.95652174 0.7826087 0.86956522 0.82608696 0.95652174
|
|
0.95652174 0.86956522 0.90909091 0.86363636]
|
|
|
|
mean value: 0.8816205533596838
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.95652174 0.7826087 0.85714286 0.81818182 0.95652174
|
|
0.96 0.86956522 0.90909091 0.85714286]
|
|
|
|
mean value: 0.876677583286279
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.91666667 0.75 0.9 0.9 1.
|
|
0.92307692 0.90909091 0.90909091 0.9 ]
|
|
|
|
mean value: 0.8996814296814297
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 1. 0.81818182 0.81818182 0.75 0.91666667
|
|
1. 0.83333333 0.90909091 0.81818182]
|
|
|
|
mean value: 0.8590909090909091
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8219697 0.95833333 0.78409091 0.86742424 0.82954545 0.95833333
|
|
0.95454545 0.87121212 0.90909091 0.86363636]
|
|
|
|
mean value: 0.8818181818181818
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.91666667 0.64285714 0.75 0.69230769 0.91666667
|
|
0.92307692 0.76923077 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7860805860805861
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.93184686 0.88155961 0.97611141 0.94901252 0.89592099 0.96174765
|
|
0.93060923 0.96457934 0.91490054 0.93486118]
|
|
|
|
mean value: 0.934114933013916
|
|
|
|
key: score_time
|
|
value: [0.24499297 0.24134612 0.18891835 0.13461995 0.24500108 0.2090826
|
|
0.13763881 0.20907116 0.14146399 0.24355125]
|
|
|
|
mean value: 0.19956862926483154
|
|
|
|
key: test_mcc
|
|
value: [0.65909298 0.76764947 0.58930667 0.65909298 0.74242424 0.83971912
|
|
0.82575758 0.83971912 0.73029674 0.54772256]
|
|
|
|
mean value: 0.7200781469442072
|
|
|
|
key: train_mcc
|
|
value: [0.9707786 0.94163576 0.97114302 0.961154 0.96116136 0.9707786
|
|
0.95163291 0.96116136 0.95150116 0.95186015]
|
|
|
|
mean value: 0.9592806896568655
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.86956522 0.7826087 0.82608696 0.86956522 0.91304348
|
|
0.91304348 0.91304348 0.86363636 0.77272727]
|
|
|
|
mean value: 0.8549407114624505
|
|
|
|
key: train_accuracy
|
|
value: [0.98536585 0.97073171 0.98536585 0.9804878 0.9804878 0.98536585
|
|
0.97560976 0.9804878 0.97572816 0.97572816]
|
|
|
|
mean value: 0.9795358749704002
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.88 0.8 0.8 0.86956522 0.90909091
|
|
0.91666667 0.90909091 0.86956522 0.7826087 ]
|
|
|
|
mean value: 0.8536587615283268
|
|
|
|
key: train_fscore
|
|
value: [0.98536585 0.97115385 0.98564593 0.98076923 0.98058252 0.98536585
|
|
0.97584541 0.98058252 0.97584541 0.97607656]
|
|
|
|
mean value: 0.9797233142078156
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.78571429 0.71428571 0.88888889 0.90909091 1.
|
|
0.91666667 1. 0.83333333 0.75 ]
|
|
|
|
mean value: 0.8686868686868687
|
|
|
|
key: train_precision /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
value: [0.99019608 0.96190476 0.97169811 0.97142857 0.97115385 0.98058252
|
|
0.96190476 0.97115385 0.97115385 0.96226415]
|
|
|
|
mean value: 0.9713440500553794
|
|
|
|
key: test_recall
|
|
value: [0.72727273 1. 0.90909091 0.72727273 0.83333333 0.83333333
|
|
0.91666667 0.83333333 0.90909091 0.81818182]
|
|
|
|
mean value: 0.8507575757575758
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.98058252 1. 0.99029126 0.99019608 0.99019608
|
|
0.99019608 0.99019608 0.98058252 0.99029126]
|
|
|
|
mean value: 0.9883114410812869
|
|
|
|
key: test_roc_auc
|
|
value: [0.8219697 0.875 0.78787879 0.8219697 0.87121212 0.91666667
|
|
0.91287879 0.91666667 0.86363636 0.77272727]
|
|
|
|
mean value: 0.8560606060606061
|
|
|
|
key: train_roc_auc
|
|
value: [0.9853893 0.97068342 0.98529412 0.98043975 0.98053493 0.9853893
|
|
0.97568056 0.98053493 0.97572816 0.97572816]
|
|
|
|
mean value: 0.9795402627070245
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.78571429 0.66666667 0.66666667 0.76923077 0.83333333
|
|
0.84615385 0.83333333 0.76923077 0.64285714]
|
|
|
|
mean value: 0.747985347985348
|
|
|
|
key: train_jcc
|
|
value: [0.97115385 0.94392523 0.97169811 0.96226415 0.96190476 0.97115385
|
|
0.95283019 0.96190476 0.95283019 0.95327103]
|
|
|
|
mean value: 0.9602936119308894
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02315831 0.00965405 0.00990772 0.00992966 0.00958633 0.00925899
|
|
0.00968218 0.00935221 0.00954723 0.00899029]
|
|
|
|
mean value: 0.010906696319580078
|
|
|
|
key: score_time
|
|
value: [0.010185 0.0090487 0.00989842 0.00874281 0.00877428 0.00885487
|
|
0.00888062 0.00934005 0.00959086 0.009305 ]
|
|
|
|
mean value: 0.00926206111907959
|
|
|
|
key: test_mcc
|
|
value: [0.58002308 0.12878788 0.12336594 0.21452908 0.39393939 0.39393939
|
|
0.05427825 0.39393939 0.18257419 0.20412415]
|
|
|
|
mean value: 0.26695007377892416
|
|
|
|
key: train_mcc
|
|
value: [0.43994849 0.50824626 0.49637007 0.45056913 0.46832513 0.45757548
|
|
0.46948042 0.48928361 0.48018451 0.46191786]
|
|
|
|
mean value: 0.47219009507999327
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.56521739 0.56521739 0.60869565 0.69565217 0.69565217
|
|
0.52173913 0.69565217 0.59090909 0.59090909]
|
|
|
|
mean value: 0.6312252964426878
|
|
|
|
key: train_accuracy
|
|
value: [0.71707317 0.75121951 0.74634146 0.72195122 0.72682927 0.72682927
|
|
0.73170732 0.74146341 0.73786408 0.72815534]
|
|
|
|
mean value: 0.7329434051622069
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.54545455 0.44444444 0.52631579 0.69565217 0.69565217
|
|
0.47619048 0.69565217 0.57142857 0.47058824]
|
|
|
|
mean value: 0.5858220689288127
|
|
|
|
key: train_fscore
|
|
value: [0.69473684 0.73298429 0.73195876 0.6984127 0.68539326 0.70526316
|
|
0.70588235 0.71657754 0.71875 0.70526316]
|
|
|
|
mean value: 0.7095222063862845
|
|
|
|
key: test_precision
|
|
value: [0.875 0.54545455 0.57142857 0.625 0.72727273 0.72727273
|
|
0.55555556 0.72727273 0.6 0.66666667]
|
|
|
|
mean value: 0.6620923520923521
|
|
|
|
key: train_precision
|
|
value: [0.75862069 0.79545455 0.78021978 0.76744186 0.80263158 0.76136364
|
|
0.77647059 0.78823529 0.7752809 0.77011494]
|
|
|
|
mean value: 0.7775833814863701
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.54545455 0.36363636 0.45454545 0.66666667 0.66666667
|
|
0.41666667 0.66666667 0.54545455 0.36363636]
|
|
|
|
mean value: 0.5325757575757576
|
|
|
|
key: train_recall
|
|
value: [0.6407767 0.67961165 0.68932039 0.6407767 0.59803922 0.65686275
|
|
0.64705882 0.65686275 0.66990291 0.65048544]
|
|
|
|
mean value: 0.6529697315819532
|
|
|
|
key: test_roc_auc
|
|
value: [0.77651515 0.56439394 0.55681818 0.60227273 0.6969697 0.6969697
|
|
0.52651515 0.6969697 0.59090909 0.59090909]
|
|
|
|
mean value: 0.6299242424242424
|
|
|
|
key: train_roc_auc
|
|
value: [0.71744717 0.75157053 0.74662098 0.72234913 0.72620407 0.72648962
|
|
0.7312964 0.74105273 0.73786408 0.72815534]
|
|
|
|
mean value: 0.7329050066628594
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.375 0.28571429 0.35714286 0.53333333 0.53333333
|
|
0.3125 0.53333333 0.4 0.30769231]
|
|
|
|
mean value: 0.4221382783882784
|
|
|
|
key: train_jcc
|
|
value: [0.53225806 0.5785124 0.57723577 0.53658537 0.52136752 0.54471545
|
|
0.54545455 0.55833333 0.56097561 0.54471545]
|
|
|
|
mean value: 0.5500153503642167
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.07360411 0.05273223 0.05829406 0.06245947 0.0599854 0.05718923
|
|
0.06659317 0.06024432 0.06122375 0.06588459]
|
|
|
|
mean value: 0.06182103157043457
|
|
|
|
key: score_time
|
|
value: [0.01040006 0.01061964 0.01049256 0.01047063 0.01048827 0.01023436
|
|
0.01153612 0.01126742 0.0113802 0.01140666]
|
|
|
|
mean value: 0.010829591751098632
|
|
|
|
key: test_mcc
|
|
value: [0.91666667 0.91666667 0.74242424 1. 0.83971912 0.83971912
|
|
1. 1. 0.91287093 1. ]
|
|
|
|
mean value: 0.9168066750452115
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95652174 0.95652174 0.86956522 1. 0.91304348 0.91304348
|
|
1. 1. 0.95454545 1. ]
|
|
|
|
mean value: 0.9563241106719368
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95652174 0.95652174 0.86956522 1. 0.90909091 0.90909091
|
|
1. 1. 0.95652174 1. ]
|
|
|
|
mean value: 0.9557312252964427
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91666667 0.91666667 0.83333333 1. 1. 1.
|
|
1. 1. 0.91666667 1. ]
|
|
|
|
mean value: 0.9583333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.90909091 1. 0.83333333 0.83333333
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9575757575757575
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95833333 0.95833333 0.87121212 1. 0.91666667 0.91666667
|
|
1. 1. 0.95454545 1. ]
|
|
|
|
mean value: 0.9575757575757575
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91666667 0.91666667 0.76923077 1. 0.83333333 0.83333333
|
|
1. 1. 0.91666667 1. ]
|
|
|
|
mean value: 0.9185897435897435
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03170872 0.05358624 0.06331277 0.06383777 0.07470703 0.05675173
|
|
0.0568521 0.05640078 0.05648899 0.05701303]
|
|
|
|
mean value: 0.057065916061401364
|
|
|
|
key: score_time
|
|
value: [0.02100611 0.0222826 0.02220082 0.02002335 0.02134395 0.02366328
|
|
0.02332687 0.01963353 0.02289724 0.02070689]
|
|
|
|
mean value: 0.021708464622497557
|
|
|
|
key: test_mcc
|
|
value: [0.76277007 0.56490196 0.58930667 0.48075018 0.6992059 0.82575758
|
|
0.56490196 0.58930667 0.91287093 0.56694671]
|
|
|
|
mean value: 0.6556718603789258
|
|
|
|
key: train_mcc
|
|
value: [0.92211753 0.9707786 0.94146202 0.92211753 0.94164684 0.93175328
|
|
0.95163291 0.95126594 0.91266437 0.94192516]
|
|
|
|
mean value: 0.938736418639331
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.7826087 0.7826087 0.73913043 0.82608696 0.91304348
|
|
0.7826087 0.7826087 0.95454545 0.77272727]
|
|
|
|
mean value: 0.8205533596837945
|
|
|
|
key: train_accuracy
|
|
value: [0.96097561 0.98536585 0.97073171 0.96097561 0.97073171 0.96585366
|
|
0.97560976 0.97560976 0.95631068 0.97087379]
|
|
|
|
mean value: 0.9693038124556003
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.76190476 0.8 0.7 0.8 0.91666667
|
|
0.8 0.76190476 0.95238095 0.73684211]
|
|
|
|
mean value: 0.8071804511278196
|
|
|
|
key: train_fscore
|
|
value: [0.96153846 0.98536585 0.97087379 0.96153846 0.97087379 0.96585366
|
|
0.97584541 0.97560976 0.95652174 0.97115385]
|
|
|
|
mean value: 0.969517476009744
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 0.71428571 0.77777778 1. 0.91666667
|
|
0.76923077 0.88888889 1. 0.875 ]
|
|
|
|
mean value: 0.8741849816849817
|
|
|
|
key: train_precision
|
|
value: [0.95238095 0.99019608 0.97087379 0.95238095 0.96153846 0.96116505
|
|
0.96190476 0.97087379 0.95192308 0.96190476]
|
|
|
|
mean value: 0.9635141666823563
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.72727273 0.90909091 0.63636364 0.66666667 0.91666667
|
|
0.83333333 0.66666667 0.90909091 0.63636364]
|
|
|
|
mean value: 0.7628787878787878
|
|
|
|
key: train_recall
|
|
value: [0.97087379 0.98058252 0.97087379 0.97087379 0.98039216 0.97058824
|
|
0.99019608 0.98039216 0.96116505 0.98058252]
|
|
|
|
mean value: 0.975652008376166
|
|
|
|
key: test_roc_auc
|
|
value: [0.86363636 0.78030303 0.78787879 0.73484848 0.83333333 0.91287879
|
|
0.78030303 0.78787879 0.95454545 0.77272727]
|
|
|
|
mean value: 0.8208333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.96092709 0.9853893 0.97073101 0.96092709 0.9707786 0.96587664
|
|
0.97568056 0.97563297 0.95631068 0.97087379]
|
|
|
|
mean value: 0.9693127736531506
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.61538462 0.66666667 0.53846154 0.66666667 0.84615385
|
|
0.66666667 0.61538462 0.90909091 0.58333333]
|
|
|
|
mean value: 0.6835081585081585
|
|
|
|
key: train_jcc
|
|
value: [0.92592593 0.97115385 0.94339623 0.92592593 0.94339623 0.93396226
|
|
0.95283019 0.95238095 0.91666667 0.94392523]
|
|
|
|
mean value: 0.9409563456358554
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0219748 0.01021671 0.01002097 0.009799 0.00974965 0.00970197
|
|
0.00891066 0.00987864 0.00947976 0.00913072]
|
|
|
|
mean value: 0.010886287689208985
|
|
|
|
key: score_time
|
|
value: [0.01003003 0.00961876 0.00944829 0.00925136 0.00877619 0.00935459
|
|
0.00935292 0.00873876 0.00937414 0.00935054]
|
|
|
|
mean value: 0.009329557418823242
|
|
|
|
key: test_mcc
|
|
value: [0.47727273 0.39393939 0.39393939 0.38932432 0.30240737 0.56818182
|
|
0.56490196 0.02585438 0.09759001 0.09245003]
|
|
|
|
mean value: 0.3305861402074863
|
|
|
|
key: train_mcc
|
|
value: [0.41481375 0.48545031 0.46581391 0.4461775 0.40046964 0.42940367
|
|
0.47412116 0.45056913 0.44098577 0.50679276]
|
|
|
|
mean value: 0.4514597602525188
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.69565217 0.69565217 0.69565217 0.65217391 0.7826087
|
|
0.7826087 0.52173913 0.54545455 0.54545455]
|
|
|
|
mean value: 0.6656126482213438
|
|
|
|
key: train_accuracy
|
|
value: [0.70243902 0.74146341 0.73170732 0.72195122 0.69756098 0.71219512
|
|
0.73658537 0.72195122 0.7184466 0.75242718]
|
|
|
|
mean value: 0.7236727444944352
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.69565217 0.69565217 0.66666667 0.69230769 0.7826087
|
|
0.8 0.62068966 0.61538462 0.58333333]
|
|
|
|
mean value: 0.687956773361571
|
|
|
|
key: train_fscore
|
|
value: [0.73362445 0.75576037 0.74654378 0.73732719 0.71818182 0.73059361
|
|
0.74285714 0.74208145 0.73636364 0.7627907 ]
|
|
|
|
mean value: 0.7406124140900755
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.66666667 0.66666667 0.7 0.64285714 0.81818182
|
|
0.76923077 0.52941176 0.53333333 0.53846154]
|
|
|
|
mean value: 0.6592082427376545
|
|
|
|
key: train_precision
|
|
value: [0.66666667 0.71929825 0.71052632 0.70175439 0.66949153 0.68376068
|
|
0.72222222 0.68907563 0.69230769 0.73214286]
|
|
|
|
mean value: 0.6987246225144372
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.72727273 0.72727273 0.63636364 0.75 0.75
|
|
0.83333333 0.75 0.72727273 0.63636364]
|
|
|
|
mean value: 0.7265151515151516
|
|
|
|
key: train_recall
|
|
value: [0.81553398 0.7961165 0.78640777 0.77669903 0.7745098 0.78431373
|
|
0.76470588 0.80392157 0.78640777 0.7961165 ]
|
|
|
|
mean value: 0.7884732533790215
|
|
|
|
key: test_roc_auc
|
|
value: [0.73863636 0.6969697 0.6969697 0.69318182 0.64772727 0.78409091
|
|
0.78030303 0.51136364 0.54545455 0.54545455]
|
|
|
|
mean value: 0.6640151515151516
|
|
|
|
key: train_roc_auc
|
|
value: [0.70188464 0.74119551 0.73143918 0.72168285 0.69793451 0.71254521
|
|
0.73672187 0.72234913 0.7184466 0.75242718]
|
|
|
|
mean value: 0.7236626689510756
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.53333333 0.53333333 0.5 0.52941176 0.64285714
|
|
0.66666667 0.45 0.44444444 0.41176471]
|
|
|
|
mean value: 0.5283239962651727
|
|
|
|
key: train_jcc
|
|
value: [0.57931034 0.60740741 0.59558824 0.58394161 0.56028369 0.57553957
|
|
0.59090909 0.58992806 0.58273381 0.61654135]
|
|
|
|
mean value: 0.5882183164453261
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01108789 0.0161562 0.01473117 0.01652384 0.01481438 0.015769
|
|
0.01692629 0.01509166 0.01592302 0.01968622]
|
|
|
|
mean value: 0.015670967102050782
|
|
|
|
key: score_time
|
|
value: [0.00860429 0.01099896 0.01096487 0.01157355 0.01150799 0.01149702
|
|
0.011621 0.01159811 0.01157045 0.01161623]
|
|
|
|
mean value: 0.011155247688293457
|
|
|
|
key: test_mcc
|
|
value: [0.62050523 0.66414149 0.48856385 0.69084928 0.83971912 0.63327851
|
|
0.74047959 0.56490196 0.48795004 0.40824829]
|
|
|
|
mean value: 0.6138637350421967
|
|
|
|
key: train_mcc
|
|
value: [0.64013725 0.94146202 0.961154 0.79610703 0.88361919 0.72360351
|
|
0.88909823 0.82136935 0.71743005 0.88083033]
|
|
|
|
mean value: 0.8254810956558689
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.82608696 0.73913043 0.82608696 0.91304348 0.7826087
|
|
0.86956522 0.7826087 0.72727273 0.68181818]
|
|
|
|
mean value: 0.7930830039525691
|
|
|
|
key: train_accuracy
|
|
value: [0.7902439 0.97073171 0.9804878 0.88780488 0.94146341 0.84390244
|
|
0.94146341 0.90731707 0.83980583 0.9368932 ]
|
|
|
|
mean value: 0.9040113663272555
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.83333333 0.75 0.77777778 0.90909091 0.73684211
|
|
0.88 0.8 0.76923077 0.74074074]
|
|
|
|
mean value: 0.7902897988377864
|
|
|
|
key: train_fscore
|
|
value: [0.73619632 0.97087379 0.98076923 0.87431694 0.94230769 0.81395349
|
|
0.94444444 0.9124424 0.86192469 0.94063927]
|
|
|
|
mean value: 0.8977868253122568
|
|
|
|
key: test_precision
|
|
value: [1. 0.76923077 0.69230769 1. 1. 1.
|
|
0.84615385 0.76923077 0.66666667 0.625 ]
|
|
|
|
mean value: 0.8368589743589744
|
|
|
|
key: train_precision
|
|
value: [1. 0.97087379 0.97142857 1. 0.9245283 1.
|
|
0.89473684 0.86086957 0.75735294 0.88793103]
|
|
|
|
mean value: 0.9267721042705015
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.90909091 0.81818182 0.63636364 0.83333333 0.58333333
|
|
0.91666667 0.83333333 0.90909091 0.90909091]
|
|
|
|
mean value: 0.7893939393939394
|
|
|
|
key: train_recall
|
|
value: [0.58252427 0.97087379 0.99029126 0.77669903 0.96078431 0.68627451
|
|
1. 0.97058824 1. 1. ]
|
|
|
|
mean value: 0.8938035408338092
|
|
|
|
key: test_roc_auc
|
|
value: [0.77272727 0.82954545 0.74242424 0.81818182 0.91666667 0.79166667
|
|
0.86742424 0.78030303 0.72727273 0.68181818]
|
|
|
|
mean value: 0.7928030303030303
|
|
|
|
key: train_roc_auc
|
|
value: [0.79126214 0.97073101 0.98043975 0.88834951 0.94155721 0.84313725
|
|
0.94174757 0.90762421 0.83980583 0.9368932 ]
|
|
|
|
mean value: 0.904154768703598
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.71428571 0.6 0.63636364 0.83333333 0.58333333
|
|
0.78571429 0.66666667 0.625 0.58823529]
|
|
|
|
mean value: 0.6578386809269162
|
|
|
|
key: train_jcc
|
|
value: [0.58252427 0.94339623 0.96226415 0.77669903 0.89090909 0.68627451
|
|
0.89473684 0.83898305 0.75735294 0.88793103]
|
|
|
|
mean value: 0.8221071147654326
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01452732 0.01389766 0.01486945 0.0147233 0.01471424 0.01462889
|
|
0.0140667 0.01539159 0.01441121 0.01535964]
|
|
|
|
mean value: 0.0146589994430542
|
|
|
|
key: score_time
|
|
value: [0.01154995 0.0115397 0.01155138 0.01168466 0.01158977 0.01151705
|
|
0.01147985 0.01160812 0.01153541 0.01155424]
|
|
|
|
mean value: 0.011561012268066407
|
|
|
|
key: test_mcc
|
|
value: [0.40451992 0.66414149 0.33371191 0.76764947 0.6992059 0.74242424
|
|
0.65151515 0.83971912 0.68313005 0.40824829]
|
|
|
|
mean value: 0.6194265542300403
|
|
|
|
key: train_mcc
|
|
value: [0.4515346 0.84539215 0.89473501 0.72360351 0.73146795 0.88558308
|
|
0.7922197 0.91330072 0.81319759 0.69427256]
|
|
|
|
mean value: 0.774530686441988
|
|
|
|
key: test_accuracy
|
|
value: [0.65217391 0.82608696 0.65217391 0.86956522 0.82608696 0.86956522
|
|
0.82608696 0.91304348 0.81818182 0.68181818]
|
|
|
|
mean value: 0.7934782608695652
|
|
|
|
key: train_accuracy
|
|
value: [0.66829268 0.92195122 0.94634146 0.84390244 0.84878049 0.94146341
|
|
0.88780488 0.95609756 0.89805825 0.82524272]
|
|
|
|
mean value: 0.873793511721525
|
|
|
|
key: test_fscore
|
|
value: [0.42857143 0.83333333 0.69230769 0.88 0.8 0.86956522
|
|
0.83333333 0.90909091 0.84615385 0.58823529]
|
|
|
|
mean value: 0.7680591054299494
|
|
|
|
key: train_fscore
|
|
value: [0.50724638 0.92 0.94835681 0.86554622 0.82080925 0.93877551
|
|
0.87431694 0.9569378 0.90748899 0.78823529]
|
|
|
|
mean value: 0.8527713181405282
|
|
|
|
key: test_precision
|
|
value: [1. 0.76923077 0.6 0.78571429 1. 0.90909091
|
|
0.83333333 1. 0.73333333 0.83333333]
|
|
|
|
mean value: 0.8464035964035964
|
|
|
|
key: train_precision
|
|
value: [1. 0.94845361 0.91818182 0.76296296 1. 0.9787234
|
|
0.98765432 0.93457944 0.83064516 1. ]
|
|
|
|
mean value: 0.9361200715177836
|
|
|
|
key: test_recall
|
|
value: [0.27272727 0.90909091 0.81818182 1. 0.66666667 0.83333333
|
|
0.83333333 0.83333333 1. 0.45454545]
|
|
|
|
mean value: 0.7621212121212121
|
|
|
|
key: train_recall
|
|
value: [0.33980583 0.89320388 0.98058252 1. 0.69607843 0.90196078
|
|
0.78431373 0.98039216 1. 0.65048544]
|
|
|
|
mean value: 0.8226822767942128
|
|
|
|
key: test_roc_auc
|
|
value: [0.63636364 0.82954545 0.65909091 0.875 0.83333333 0.87121212
|
|
0.82575758 0.91666667 0.81818182 0.68181818]
|
|
|
|
mean value: 0.7946969696969697
|
|
|
|
key: train_roc_auc
|
|
value: [0.66990291 0.92209214 0.94617362 0.84313725 0.84803922 0.94127165
|
|
0.88730249 0.9562155 0.89805825 0.82524272]
|
|
|
|
mean value: 0.8737435750999429
|
|
|
|
key: test_jcc
|
|
value: [0.27272727 0.71428571 0.52941176 0.78571429 0.66666667 0.76923077
|
|
0.71428571 0.83333333 0.73333333 0.41666667]
|
|
|
|
mean value: 0.6435655520949639
|
|
|
|
key: train_jcc
|
|
value: [0.33980583 0.85185185 0.90178571 0.76296296 0.69607843 0.88461538
|
|
0.77669903 0.91743119 0.83064516 0.65048544]
|
|
|
|
mean value: 0.7612360990301472
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.13866186 0.11934733 0.11773157 0.11565089 0.11581945 0.117486
|
|
0.11805749 0.12116289 0.11758924 0.11728621]
|
|
|
|
mean value: 0.11987929344177246
|
|
|
|
key: score_time
|
|
value: [0.0161581 0.01547503 0.01618123 0.01490808 0.01621389 0.01644397
|
|
0.01605368 0.01637292 0.01620722 0.01623797]
|
|
|
|
mean value: 0.016025209426879884
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.82575758 0.74242424 0.91605722 0.83971912 0.83971912
|
|
1. 0.66414149 0.91287093 0.81818182]
|
|
|
|
mean value: 0.8299351113957522
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.91304348 0.86956522 0.95652174 0.91304348 0.91304348
|
|
1. 0.82608696 0.95454545 0.90909091]
|
|
|
|
mean value: 0.9124505928853754
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.90909091 0.86956522 0.95238095 0.90909091 0.90909091
|
|
1. 0.81818182 0.95238095 0.90909091]
|
|
|
|
mean value: 0.9086015433841521
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.90909091 0.83333333 1. 1. 1.
|
|
1. 0.9 1. 0.90909091]
|
|
|
|
mean value: 0.9451515151515152
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.90909091 0.90909091 0.90909091 0.83333333 0.83333333
|
|
1. 0.75 0.90909091 0.90909091]
|
|
|
|
mean value: 0.878030303030303
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.91287879 0.87121212 0.95454545 0.91666667 0.91666667
|
|
1. 0.82954545 0.95454545 0.90909091]
|
|
|
|
mean value: 0.9132575757575758
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.83333333 0.76923077 0.90909091 0.83333333 0.83333333
|
|
1. 0.69230769 0.90909091 0.83333333]
|
|
|
|
mean value: 0.8363053613053613
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04288006 0.03579044 0.03872585 0.05015779 0.04801965 0.05010223
|
|
0.04300642 0.05224681 0.04316568 0.06040478]
|
|
|
|
mean value: 0.04644997119903564
|
|
|
|
key: score_time
|
|
value: [0.01788092 0.0261817 0.01968694 0.03280926 0.02911305 0.04006219
|
|
0.02331567 0.02224922 0.0289259 0.03248787]
|
|
|
|
mean value: 0.027271270751953125
|
|
|
|
key: test_mcc
|
|
value: [0.58002308 1. 0.65151515 1. 0.83971912 0.83971912
|
|
0.83971912 0.91666667 0.91287093 0.83205029]
|
|
|
|
mean value: 0.8412283485098235
|
|
|
|
key: train_mcc
|
|
value: [0.98067587 0.99029126 0.98067587 1. 0.99029034 1.
|
|
0.99029034 0.98067223 0.99033794 0.98076744]
|
|
|
|
mean value: 0.9884001294873583
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 1. 0.82608696 1. 0.91304348 0.91304348
|
|
0.91304348 0.95652174 0.95454545 0.90909091]
|
|
|
|
mean value: 0.916798418972332
|
|
|
|
key: train_accuracy
|
|
value: [0.9902439 0.99512195 0.9902439 1. 0.99512195 1.
|
|
0.99512195 0.9902439 0.99514563 0.99029126]
|
|
|
|
mean value: 0.9941534454179494
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 1. 0.81818182 1. 0.90909091 0.90909091
|
|
0.90909091 0.95652174 0.95238095 0.9 ]
|
|
|
|
mean value: 0.909119934222909
|
|
|
|
key: train_fscore
|
|
value: [0.99019608 0.99512195 0.99019608 1. 0.99507389 1.
|
|
0.99507389 0.99009901 0.99512195 0.99019608]
|
|
|
|
mean value: 0.9941078930885363
|
|
|
|
key: test_precision
|
|
value: [0.875 1. 0.81818182 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9693181818181819
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.63636364 1. 0.81818182 1. 0.83333333 0.83333333
|
|
0.83333333 0.91666667 0.90909091 0.81818182]
|
|
|
|
mean value: 0.8598484848484849
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.99029126 0.98058252 1. 0.99019608 1.
|
|
0.99019608 0.98039216 0.99029126 0.98058252]
|
|
|
|
mean value: 0.9883114410812869
|
|
|
|
key: test_roc_auc
|
|
value: [0.77651515 1. 0.82575758 1. 0.91666667 0.91666667
|
|
0.91666667 0.95833333 0.95454545 0.90909091]
|
|
|
|
mean value: 0.9174242424242425
|
|
|
|
key: train_roc_auc
|
|
value: [0.99029126 0.99514563 0.99029126 1. 0.99509804 1.
|
|
0.99509804 0.99019608 0.99514563 0.99029126]
|
|
|
|
mean value: 0.9941557205406435
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 1. 0.69230769 1. 0.83333333 0.83333333
|
|
0.83333333 0.91666667 0.90909091 0.81818182]
|
|
|
|
mean value: 0.8419580419580419
|
|
|
|
key: train_jcc
|
|
value: [0.98058252 0.99029126 0.98058252 1. 0.99019608 1.
|
|
0.99019608 0.98039216 0.99029126 0.98058252]
|
|
|
|
mean value: 0.9883114410812869
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02399015 0.02728724 0.03085113 0.06025457 0.07673955 0.06412292
|
|
0.06474566 0.06357622 0.0647521 0.06365848]
|
|
|
|
mean value: 0.053997802734375
|
|
|
|
key: score_time
|
|
value: [0.0126431 0.0125792 0.01255274 0.02063203 0.02428436 0.02345872
|
|
0.02092385 0.0228548 0.02434254 0.02310681]
|
|
|
|
mean value: 0.019737815856933592
|
|
|
|
key: test_mcc
|
|
value: [0.38932432 0.47727273 0.21452908 0.30240737 0.66414149 0.76764947
|
|
0.5164589 0.74047959 0.54232614 0.2773501 ]
|
|
|
|
mean value: 0.489193919560136
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.69565217 0.73913043 0.60869565 0.65217391 0.82608696 0.86956522
|
|
0.73913043 0.86956522 0.72727273 0.63636364]
|
|
|
|
mean value: 0.7363636363636363
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72727273 0.52631579 0.6 0.81818182 0.85714286
|
|
0.7 0.88 0.625 0.6 ]
|
|
|
|
mean value: 0.7000579858737753
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.72727273 0.625 0.66666667 0.9 1.
|
|
0.875 0.84615385 1. 0.66666667]
|
|
|
|
mean value: 0.8006759906759907
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.72727273 0.45454545 0.54545455 0.75 0.75
|
|
0.58333333 0.91666667 0.45454545 0.54545455]
|
|
|
|
mean value: 0.6363636363636364
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.69318182 0.73863636 0.60227273 0.64772727 0.82954545 0.875
|
|
0.74621212 0.86742424 0.72727273 0.63636364]
|
|
|
|
mean value: 0.7363636363636363
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.57142857 0.35714286 0.42857143 0.69230769 0.75
|
|
0.53846154 0.78571429 0.45454545 0.42857143]
|
|
|
|
mean value: 0.5506743256743256
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.36353517 0.35397005 0.34888005 0.34964776 0.35780954 0.35491943
|
|
0.35081267 0.35545444 0.35240722 0.34948468]
|
|
|
|
mean value: 0.35369210243225097
|
|
|
|
key: score_time
|
|
value: [0.00919867 0.0091424 0.00909376 0.00912738 0.00930524 0.00908327
|
|
0.0090704 0.00978851 0.00921178 0.00905943]
|
|
|
|
mean value: 0.009208083152770996
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.91666667 0.74242424 1. 0.76764947 1.
|
|
1. 0.91666667 0.91287093 1. ]
|
|
|
|
mean value: 0.8996757568581448
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.95652174 0.86956522 1. 0.86956522 1.
|
|
1. 0.95652174 0.95454545 1. ]
|
|
|
|
mean value: 0.9476284584980237
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.95652174 0.86956522 1. 0.85714286 1.
|
|
1. 0.95652174 0.95652174 1. ]
|
|
|
|
mean value: 0.9453416149068323
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.91666667 0.83333333 1. 1. 1.
|
|
1. 1. 0.91666667 1. ]
|
|
|
|
mean value: 0.9566666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 1. 0.90909091 1. 0.75 1.
|
|
1. 0.91666667 1. 1. ]
|
|
|
|
mean value: 0.9393939393939394
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.95833333 0.87121212 1. 0.875 1.
|
|
1. 0.95833333 0.95454545 1. ]
|
|
|
|
mean value: 0.9484848484848485
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.91666667 0.76923077 1. 0.75 1.
|
|
1. 0.91666667 0.91666667 1. ]
|
|
|
|
mean value: 0.9019230769230769
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02362514 0.0202353 0.0199132 0.02025294 0.0201211 0.02020693
|
|
0.02033591 0.02034235 0.01992464 0.02041364]
|
|
|
|
mean value: 0.02053711414337158
|
|
|
|
key: score_time
|
|
value: [0.01706982 0.01200247 0.01434779 0.01835537 0.017483 0.02034879
|
|
0.0230217 0.01707029 0.01792383 0.01838613]
|
|
|
|
mean value: 0.01760091781616211
|
|
|
|
key: test_mcc
|
|
value: [0.56879646 0.6992059 0.37080992 0.50460839 0.76277007 0.76277007
|
|
0.69084928 0.83743579 0.64715023 0.75592895]
|
|
|
|
mean value: 0.6600325061286204
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.82608696 0.60869565 0.69565217 0.86956522 0.86956522
|
|
0.82608696 0.91304348 0.81818182 0.86363636]
|
|
|
|
mean value: 0.8029644268774704
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.78571429 0.84615385 0.70967742 0.75862069 0.88888889 0.88888889
|
|
0.85714286 0.92307692 0.83333333 0.88 ]
|
|
|
|
mean value: 0.8371497132209035
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.64705882 0.73333333 0.55 0.61111111 0.8 0.8
|
|
0.75 0.85714286 0.76923077 0.78571429]
|
|
|
|
mean value: 0.7303591180061768
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.90909091 1. ]
|
|
|
|
mean value: 0.990909090909091
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.83333333 0.625 0.70833333 0.86363636 0.86363636
|
|
0.81818182 0.90909091 0.81818182 0.86363636]
|
|
|
|
mean value: 0.8053030303030303
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.64705882 0.73333333 0.55 0.61111111 0.8 0.8
|
|
0.75 0.85714286 0.71428571 0.78571429]
|
|
|
|
mean value: 0.7248646125116713
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02244925 0.04062223 0.03517199 0.03477359 0.05889964 0.01386142
|
|
0.01386476 0.01378965 0.0137496 0.03409195]
|
|
|
|
mean value: 0.028127408027648924
|
|
|
|
key: score_time
|
|
value: [0.02053857 0.02378178 0.02394128 0.02323008 0.01209402 0.01185203
|
|
0.01175451 0.01184201 0.01170516 0.02339268]
|
|
|
|
mean value: 0.01741321086883545
|
|
|
|
key: test_mcc
|
|
value: [0.82575758 0.66414149 0.47727273 0.65151515 0.76764947 0.91666667
|
|
0.74047959 0.82575758 0.81818182 0.36514837]
|
|
|
|
mean value: 0.7052570438471021
|
|
|
|
key: train_mcc
|
|
value: [0.90310636 0.90310636 0.91325992 0.92194936 0.93211467 0.92213232
|
|
0.93211467 0.86409538 0.91266437 0.92389898]
|
|
|
|
mean value: 0.9128442392047932
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.82608696 0.73913043 0.82608696 0.86956522 0.95652174
|
|
0.86956522 0.91304348 0.90909091 0.68181818]
|
|
|
|
mean value: 0.850395256916996
|
|
|
|
key: train_accuracy
|
|
value: [0.95121951 0.95121951 0.95609756 0.96097561 0.96585366 0.96097561
|
|
0.96585366 0.93170732 0.95631068 0.96116505]
|
|
|
|
mean value: 0.956137816717973
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.83333333 0.72727273 0.81818182 0.85714286 0.95652174
|
|
0.88 0.91666667 0.90909091 0.66666667]
|
|
|
|
mean value: 0.8473967626576322
|
|
|
|
key: train_fscore
|
|
value: [0.95238095 0.95238095 0.95734597 0.96116505 0.96618357 0.96116505
|
|
0.96618357 0.93269231 0.95652174 0.96226415]
|
|
|
|
mean value: 0.9568283320937857
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.76923077 0.72727273 0.81818182 1. 1.
|
|
0.84615385 0.91666667 0.90909091 0.7 ]
|
|
|
|
mean value: 0.8595687645687645
|
|
|
|
key: train_precision
|
|
value: [0.93457944 0.93457944 0.93518519 0.96116505 0.95238095 0.95192308
|
|
0.95238095 0.91509434 0.95192308 0.93577982]
|
|
|
|
mean value: 0.942499132697801
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.90909091 0.72727273 0.81818182 0.75 0.91666667
|
|
0.91666667 0.91666667 0.90909091 0.63636364]
|
|
|
|
mean value: 0.8409090909090909
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:155: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:158: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.97087379 0.97087379 0.98058252 0.96116505 0.98039216 0.97058824
|
|
0.98039216 0.95098039 0.96116505 0.99029126]
|
|
|
|
mean value: 0.971730439748715
|
|
|
|
key: test_roc_auc
|
|
value: [0.91287879 0.82954545 0.73863636 0.82575758 0.875 0.95833333
|
|
0.86742424 0.91287879 0.90909091 0.68181818]
|
|
|
|
mean value: 0.8511363636363636
|
|
|
|
key: train_roc_auc
|
|
value: [0.95112317 0.95112317 0.95597754 0.96097468 0.96592423 0.96102227
|
|
0.96592423 0.93180088 0.95631068 0.96116505]
|
|
|
|
mean value: 0.9561345897582334
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.71428571 0.57142857 0.69230769 0.75 0.91666667
|
|
0.78571429 0.84615385 0.83333333 0.5 ]
|
|
|
|
mean value: 0.7443223443223443
|
|
|
|
key: train_jcc
|
|
value: [0.90909091 0.90909091 0.91818182 0.92523364 0.93457944 0.92523364
|
|
0.93457944 0.87387387 0.91666667 0.92727273]
|
|
|
|
mean value: 0.9173803072401203
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.23017311 0.2311604 0.22449851 0.22934937 0.22811341 0.22492957
|
|
0.3225956 0.27365017 0.24024272 0.22975492]
|
|
|
|
mean value: 0.24344677925109864
|
|
|
|
key: score_time
|
|
value: [0.02268362 0.0237155 0.02395248 0.02225494 0.02246737 0.02187586
|
|
0.02325249 0.02057958 0.02395296 0.02296972]
|
|
|
|
mean value: 0.022770452499389648
|
|
|
|
key: test_mcc
|
|
value: [0.76277007 0.56818182 0.47727273 0.65151515 0.76764947 0.82575758
|
|
0.74047959 0.82575758 0.83205029 0.36514837]
|
|
|
|
mean value: 0.6816582649537872
|
|
|
|
key: train_mcc
|
|
value: [0.91223227 0.92211753 0.91325992 0.92194936 0.93211467 0.94164684
|
|
0.93211467 0.86409538 0.92250402 0.92389898]
|
|
|
|
mean value: 0.9185933650622469
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.7826087 0.73913043 0.82608696 0.86956522 0.91304348
|
|
0.86956522 0.91304348 0.90909091 0.68181818]
|
|
|
|
mean value: 0.8373517786561264
|
|
|
|
key: train_accuracy
|
|
value: [0.95609756 0.96097561 0.95609756 0.96097561 0.96585366 0.97073171
|
|
0.96585366 0.93170732 0.96116505 0.96116505]
|
|
|
|
mean value: 0.9590622780014207
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.7826087 0.72727273 0.81818182 0.85714286 0.91666667
|
|
0.88 0.91666667 0.9 0.66666667]
|
|
|
|
mean value: 0.8307311361407471
|
|
|
|
key: train_fscore
|
|
value: [0.95652174 0.96153846 0.95734597 0.96116505 0.96618357 0.97087379
|
|
0.96618357 0.93269231 0.96153846 0.96226415]
|
|
|
|
mean value: 0.9596307077116953
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.72727273 0.81818182 1. 0.91666667
|
|
0.84615385 0.91666667 1. 0.7 ]
|
|
|
|
mean value: 0.8674941724941725
|
|
|
|
key: train_precision
|
|
value: [0.95192308 0.95238095 0.93518519 0.96116505 0.95238095 0.96153846
|
|
0.95238095 0.91509434 0.95238095 0.93577982]
|
|
|
|
mean value: 0.9470209737850626
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.81818182 0.72727273 0.81818182 0.75 0.91666667
|
|
0.91666667 0.91666667 0.81818182 0.63636364]
|
|
|
|
mean value: 0.8045454545454546
|
|
|
|
key: train_recall
|
|
value: [0.96116505 0.97087379 0.98058252 0.96116505 0.98039216 0.98039216
|
|
0.98039216 0.95098039 0.97087379 0.99029126]
|
|
|
|
mean value: 0.9727108319055777
|
|
|
|
key: test_roc_auc
|
|
value: [0.86363636 0.78409091 0.73863636 0.82575758 0.875 0.91287879
|
|
0.86742424 0.91287879 0.90909091 0.68181818]
|
|
|
|
mean value: 0.8371212121212122
|
|
|
|
key: train_roc_auc
|
|
value: [0.95607272 0.96092709 0.95597754 0.96097468 0.96592423 0.9707786
|
|
0.96592423 0.93180088 0.96116505 0.96116505]
|
|
|
|
mean value: 0.9590710070435942
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.64285714 0.57142857 0.69230769 0.75 0.84615385
|
|
0.78571429 0.84615385 0.81818182 0.5 ]
|
|
|
|
mean value: 0.718006993006993
|
|
|
|
key: train_jcc
|
|
value: [0.91666667 0.92592593 0.91818182 0.92523364 0.93457944 0.94339623
|
|
0.93457944 0.87387387 0.92592593 0.92727273]
|
|
|
|
mean value: 0.9225635687626518
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02333498 0.02825093 0.02866554 0.02478814 0.03009248 0.02759218
|
|
0.02531862 0.02699327 0.02669168 0.0287993 ]
|
|
|
|
mean value: 0.027052712440490723
|
|
|
|
key: score_time
|
|
value: [0.00992012 0.01171041 0.01177144 0.01170754 0.01182556 0.01178455
|
|
0.01177168 0.01180267 0.01176238 0.01171088]
|
|
|
|
mean value: 0.0115767240524292
|
|
|
|
key: test_mcc
|
|
value: [ 0.37796447 0.49099025 0.74535599 0.57735027 0.28867513 0.42857143
|
|
0.8660254 0.17407766 -0.31622777 0.28867513]
|
|
|
|
mean value: 0.39214579792141185
|
|
|
|
key: train_mcc
|
|
value: [0.90550595 0.81271824 0.78163175 0.81289702 0.83066386 0.86200967
|
|
0.79775192 0.85947992 0.8603207 0.875 ]
|
|
|
|
mean value: 0.8397979034970979
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.73333333 0.85714286 0.78571429 0.64285714 0.71428571
|
|
0.92857143 0.57142857 0.35714286 0.64285714]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_accuracy
|
|
value: [0.95275591 0.90551181 0.890625 0.90625 0.9140625 0.9296875
|
|
0.8984375 0.9296875 0.9296875 0.9375 ]
|
|
|
|
mean value: 0.9194205216535433
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.71428571 0.875 0.76923077 0.61538462 0.71428571
|
|
0.92307692 0.66666667 0.18181818 0.61538462]
|
|
|
|
mean value: 0.6781015553074377
|
|
|
|
key: train_fscore
|
|
value: [0.953125 0.90769231 0.89230769 0.90769231 0.91729323 0.93233083
|
|
0.896 0.93023256 0.93129771 0.9375 ]
|
|
|
|
mean value: 0.9205471635905882
|
|
|
|
key: test_precision
|
|
value: [0.6 0.83333333 0.77777778 0.83333333 0.66666667 0.71428571
|
|
1. 0.54545455 0.25 0.66666667]
|
|
|
|
mean value: 0.6887518037518038
|
|
|
|
key: train_precision
|
|
value: [0.953125 0.88059701 0.87878788 0.89393939 0.88405797 0.89855072
|
|
0.91803279 0.92307692 0.91044776 0.9375 ]
|
|
|
|
mean value: 0.9078115454461019
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.625 1. 0.71428571 0.57142857 0.71428571
|
|
0.85714286 0.85714286 0.14285714 0.57142857]
|
|
|
|
mean value: 0.6910714285714286
|
|
|
|
key: train_recall
|
|
value: [0.953125 0.93650794 0.90625 0.921875 0.953125 0.96875
|
|
0.875 0.9375 0.953125 0.9375 ]
|
|
|
|
mean value: 0.9342757936507936
|
|
|
|
key: test_roc_auc
|
|
value: [0.67857143 0.74107143 0.85714286 0.78571429 0.64285714 0.71428571
|
|
0.92857143 0.57142857 0.35714286 0.64285714]
|
|
|
|
mean value: 0.6919642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.95275298 0.90575397 0.890625 0.90625 0.9140625 0.9296875
|
|
0.8984375 0.9296875 0.9296875 0.9375 ]
|
|
|
|
mean value: 0.9194444444444444
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.55555556 0.77777778 0.625 0.44444444 0.55555556
|
|
0.85714286 0.5 0.1 0.44444444]
|
|
|
|
mean value: 0.540537518037518
|
|
|
|
key: train_jcc
|
|
value: [0.91044776 0.83098592 0.80555556 0.83098592 0.84722222 0.87323944
|
|
0.8115942 0.86956522 0.87142857 0.88235294]
|
|
|
|
mean value: 0.8533377739472339
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.85289168 0.70059133 0.73125625 0.85352159 0.73122883 0.70029855
|
|
0.80738592 0.64156055 0.63386559 0.79457211]
|
|
|
|
mean value: 0.7447172403335571
|
|
|
|
key: score_time
|
|
value: [0.01466966 0.01212597 0.01516724 0.01518154 0.01209664 0.01497483
|
|
0.01526499 0.01661897 0.01539016 0.01516986]
|
|
|
|
mean value: 0.014665985107421875
|
|
|
|
key: test_mcc
|
|
value: [ 0.21821789 0.33928571 0.57735027 0.8660254 0.42857143 0.57735027
|
|
0.74535599 0.42857143 -0.14285714 0.1490712 ]
|
|
|
|
mean value: 0.4186942451971027
|
|
|
|
key: train_mcc
|
|
value: [1. 0.93748452 0.90669283 1. 0.89073374 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9734911092040202
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.66666667 0.78571429 0.92857143 0.71428571 0.78571429
|
|
0.85714286 0.71428571 0.42857143 0.57142857]
|
|
|
|
mean value: 0.7052380952380952
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.96850394 0.953125 1. 0.9453125 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9866941437007875
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.66666667 0.8 0.92307692 0.71428571 0.76923077
|
|
0.83333333 0.71428571 0.42857143 0.5 ]
|
|
|
|
mean value: 0.6974450549450549
|
|
|
|
key: train_fscore
|
|
value: [1. 0.96875 0.95384615 1. 0.94573643 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9868332587954681
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.71428571 0.75 1. 0.71428571 0.83333333
|
|
1. 0.71428571 0.42857143 0.6 ]
|
|
|
|
mean value: 0.731031746031746
|
|
|
|
key: train_precision
|
|
value: [1. 0.95384615 0.93939394 1. 0.93846154 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9831701631701631
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.625 0.85714286 0.85714286 0.71428571 0.71428571
|
|
0.71428571 0.71428571 0.42857143 0.42857143]
|
|
|
|
mean value: 0.6767857142857143
|
|
|
|
key: train_recall
|
|
value: [1. 0.98412698 0.96875 1. 0.953125 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9906001984126984
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.66964286 0.78571429 0.92857143 0.71428571 0.78571429
|
|
0.85714286 0.71428571 0.42857143 0.57142857]
|
|
|
|
mean value: 0.70625
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.96862599 0.953125 1. 0.9453125 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9867063492063493
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.5 0.66666667 0.85714286 0.55555556 0.625
|
|
0.71428571 0.55555556 0.27272727 0.33333333]
|
|
|
|
mean value: 0.553481240981241
|
|
|
|
key: train_jcc
|
|
value: [1. 0.93939394 0.91176471 1. 0.89705882 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9748217468805704
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01235104 0.01005244 0.01008081 0.00952315 0.00941539 0.00865269
|
|
0.00883651 0.00973344 0.00860357 0.00904226]
|
|
|
|
mean value: 0.009629130363464355
|
|
|
|
key: score_time
|
|
value: [0.01814413 0.00991583 0.00927973 0.00927973 0.0089798 0.00896645
|
|
0.00927234 0.00917888 0.00874782 0.00863695]
|
|
|
|
mean value: 0.01004016399383545
|
|
|
|
key: test_mcc
|
|
value: [ 0.26189246 0.18898224 0.17407766 0.40824829 0.17407766 0.31622777
|
|
0.1490712 0. -0.2773501 0.31622777]
|
|
|
|
mean value: 0.17114549346091681
|
|
|
|
key: train_mcc
|
|
value: [0.41221894 0.3438986 0.41858962 0.40451992 0.43084241 0.4031367
|
|
0.35377457 0.44649977 0.39637502 0.36808134]
|
|
|
|
mean value: 0.3977936882004
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 0.57142857 0.64285714 0.57142857 0.64285714
|
|
0.57142857 0.5 0.42857143 0.64285714]
|
|
|
|
mean value: 0.5771428571428572
|
|
|
|
key: train_accuracy
|
|
value: [0.67716535 0.62992126 0.6953125 0.640625 0.6953125 0.6796875
|
|
0.65625 0.6953125 0.6796875 0.6640625 ]
|
|
|
|
mean value: 0.6713336614173229
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.66666667 0.73684211 0.66666667 0.70588235
|
|
0.625 0.63157895 0.6 0.70588235]
|
|
|
|
mean value: 0.6671852425180598
|
|
|
|
key: train_fscore
|
|
value: [0.74534161 0.71856287 0.74172185 0.73563218 0.7483871 0.7388535
|
|
0.72151899 0.75471698 0.73548387 0.72611465]
|
|
|
|
mean value: 0.7366333616453036
|
|
|
|
key: test_precision
|
|
value: [0.54545455 0.6 0.54545455 0.58333333 0.54545455 0.6
|
|
0.55555556 0.5 0.46153846 0.6 ]
|
|
|
|
mean value: 0.5536790986790987
|
|
|
|
key: train_precision
|
|
value: [0.6185567 0.57692308 0.64367816 0.58181818 0.63736264 0.62365591
|
|
0.60638298 0.63157895 0.62637363 0.61290323]
|
|
|
|
mean value: 0.6159233450304762
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.75 0.85714286 1. 0.85714286 0.85714286
|
|
0.71428571 0.85714286 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8464285714285714
|
|
|
|
key: train_recall
|
|
value: [0.9375 0.95238095 0.875 1. 0.90625 0.90625
|
|
0.890625 0.9375 0.890625 0.890625 ]
|
|
|
|
mean value: 0.9186755952380953
|
|
|
|
key: test_roc_auc
|
|
value: [0.61607143 0.58928571 0.57142857 0.64285714 0.57142857 0.64285714
|
|
0.57142857 0.5 0.42857143 0.64285714]
|
|
|
|
mean value: 0.5776785714285715
|
|
|
|
key: train_roc_auc
|
|
value: [0.67509921 0.63244048 0.6953125 0.640625 0.6953125 0.6796875
|
|
0.65625 0.6953125 0.6796875 0.6640625 ]
|
|
|
|
mean value: 0.6713789682539683
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.5 0.58333333 0.5 0.54545455
|
|
0.45454545 0.46153846 0.42857143 0.54545455]
|
|
|
|
mean value: 0.5018897768897769
|
|
|
|
key: train_jcc
|
|
value: [0.59405941 0.56074766 0.58947368 0.58181818 0.59793814 0.58585859
|
|
0.56435644 0.60606061 0.58163265 0.57 ]
|
|
|
|
mean value: 0.5831945360474582
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00890517 0.00869513 0.00965166 0.00883913 0.00877833 0.00885248
|
|
0.00935841 0.00872326 0.00894189 0.00988293]
|
|
|
|
mean value: 0.009062838554382325
|
|
|
|
key: score_time
|
|
value: [0.00861073 0.00870466 0.00879693 0.0087924 0.00871682 0.00864601
|
|
0.0091064 0.00876236 0.0087378 0.00916338]
|
|
|
|
mean value: 0.008803749084472656
|
|
|
|
key: test_mcc
|
|
value: [ 0.18898224 0.49099025 0.4472136 -0.31622777 0.14285714 0.42857143
|
|
0. -0.1490712 -0.63245553 0.1490712 ]
|
|
|
|
mean value: 0.0749931358413612
|
|
|
|
key: train_mcc
|
|
value: [0.48209995 0.40158859 0.438357 0.42233925 0.42610928 0.40946151
|
|
0.43943537 0.50024432 0.53229065 0.438357 ]
|
|
|
|
mean value: 0.44902829325504817
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.73333333 0.71428571 0.35714286 0.57142857 0.71428571
|
|
0.5 0.42857143 0.21428571 0.57142857]
|
|
|
|
mean value: 0.5404761904761904
|
|
|
|
key: train_accuracy
|
|
value: [0.74015748 0.7007874 0.71875 0.7109375 0.7109375 0.703125
|
|
0.71875 0.75 0.765625 0.71875 ]
|
|
|
|
mean value: 0.7237819881889764
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.71428571 0.75 0.18181818 0.57142857 0.71428571
|
|
0.36363636 0.5 0. 0.5 ]
|
|
|
|
mean value: 0.47954545454545455
|
|
|
|
key: train_fscore
|
|
value: [0.73170732 0.69354839 0.70967742 0.704 0.68907563 0.68333333
|
|
0.70491803 0.75384615 0.75806452 0.70967742]
|
|
|
|
mean value: 0.7137848209227128
|
|
|
|
key: test_precision
|
|
value: [0.6 0.83333333 0.66666667 0.25 0.57142857 0.71428571
|
|
0.5 0.44444444 0. 0.6 ]
|
|
|
|
mean value: 0.518015873015873
|
|
|
|
key: train_precision
|
|
value: [0.76271186 0.70491803 0.73333333 0.72131148 0.74545455 0.73214286
|
|
0.74137931 0.74242424 0.78333333 0.73333333]
|
|
|
|
mean value: 0.7400342327969973
|
|
|
|
key: test_recall
|
|
value: [0.42857143 0.625 0.85714286 0.14285714 0.57142857 0.71428571
|
|
0.28571429 0.57142857 0. 0.42857143]
|
|
|
|
mean value: 0.46249999999999997
|
|
|
|
key: train_recall
|
|
value: [0.703125 0.68253968 0.6875 0.6875 0.640625 0.640625
|
|
0.671875 0.765625 0.734375 0.6875 ]
|
|
|
|
mean value: 0.6901289682539683
|
|
|
|
key: test_roc_auc
|
|
value: [0.58928571 0.74107143 0.71428571 0.35714286 0.57142857 0.71428571
|
|
0.5 0.42857143 0.21428571 0.57142857]
|
|
|
|
mean value: 0.5401785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.74045139 0.70064484 0.71875 0.7109375 0.7109375 0.703125
|
|
0.71875 0.75 0.765625 0.71875 ]
|
|
|
|
mean value: 0.7237971230158731
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.55555556 0.6 0.1 0.4 0.55555556
|
|
0.22222222 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.3433333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.57692308 0.5308642 0.55 0.54320988 0.52564103 0.51898734
|
|
0.5443038 0.60493827 0.61038961 0.55 ]
|
|
|
|
mean value: 0.5555257197873231
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00843072 0.00828362 0.0094943 0.00941944 0.00952077 0.00883842
|
|
0.00867176 0.00860167 0.00916886 0.00823045]
|
|
|
|
mean value: 0.008866000175476074
|
|
|
|
key: score_time
|
|
value: [0.00952911 0.00987101 0.01023984 0.0103004 0.01070189 0.01650286
|
|
0.01474071 0.01097417 0.00975394 0.00930381]
|
|
|
|
mean value: 0.011191773414611816
|
|
|
|
key: test_mcc
|
|
value: [ 0.47245559 -0.37796447 -0.28867513 -0.1490712 0.14285714 0.
|
|
0. -0.28867513 -0.57735027 -0.28867513]
|
|
|
|
mean value: -0.13550986103646007
|
|
|
|
key: train_mcc
|
|
value: [0.41894709 0.29176205 0.36047677 0.29866683 0.39298268 0.438357
|
|
0.37518324 0.375 0.438357 0.3480246 ]
|
|
|
|
mean value: 0.3737757275384978
|
|
|
|
key: test_accuracy
|
|
value: [0.73333333 0.33333333 0.35714286 0.42857143 0.57142857 0.5
|
|
0.5 0.35714286 0.21428571 0.35714286]
|
|
|
|
mean value: 0.4352380952380952
|
|
|
|
key: train_accuracy
|
|
value: [0.70866142 0.64566929 0.6796875 0.6484375 0.6953125 0.71875
|
|
0.6875 0.6875 0.71875 0.671875 ]
|
|
|
|
mean value: 0.6862143208661418
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.44444444 0.4 0.5 0.57142857 0.46153846
|
|
0.46153846 0.30769231 0.26666667 0.30769231]
|
|
|
|
mean value: 0.43876678876678876
|
|
|
|
key: train_fscore
|
|
value: [0.69918699 0.62809917 0.66666667 0.62809917 0.67768595 0.70967742
|
|
0.69230769 0.6875 0.70967742 0.6440678 ]
|
|
|
|
mean value: 0.6742968283684786
|
|
|
|
key: test_precision
|
|
value: [0.8 0.4 0.375 0.44444444 0.57142857 0.5
|
|
0.5 0.33333333 0.25 0.33333333]
|
|
|
|
mean value: 0.45075396825396824
|
|
|
|
key: train_precision
|
|
value: [0.72881356 0.65517241 0.69491525 0.66666667 0.71929825 0.73333333
|
|
0.68181818 0.6875 0.73333333 0.7037037 ]
|
|
|
|
mean value: 0.700455469182168
|
|
|
|
key: test_recall
|
|
value: [0.57142857 0.5 0.42857143 0.57142857 0.57142857 0.42857143
|
|
0.42857143 0.28571429 0.28571429 0.28571429]
|
|
|
|
mean value: 0.4357142857142857
|
|
|
|
key: train_recall
|
|
value: [0.671875 0.6031746 0.640625 0.59375 0.640625 0.6875 0.703125
|
|
0.6875 0.6875 0.59375 ]
|
|
|
|
mean value: 0.6509424603174603
|
|
|
|
key: test_roc_auc
|
|
value: [0.72321429 0.32142857 0.35714286 0.42857143 0.57142857 0.5
|
|
0.5 0.35714286 0.21428571 0.35714286]
|
|
|
|
mean value: 0.4330357142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.70895337 0.6453373 0.6796875 0.6484375 0.6953125 0.71875
|
|
0.6875 0.6875 0.71875 0.671875 ]
|
|
|
|
mean value: 0.6862103174603175
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.28571429 0.25 0.33333333 0.4 0.3
|
|
0.3 0.18181818 0.15384615 0.18181818]
|
|
|
|
mean value: 0.28865301365301366
|
|
|
|
key: train_jcc
|
|
value: [0.5375 0.45783133 0.5 0.45783133 0.5125 0.55
|
|
0.52941176 0.52380952 0.55 0.475 ]
|
|
|
|
mean value: 0.5093883939117816
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01124406 0.01096201 0.01099586 0.00968695 0.00980282 0.00962329
|
|
0.00957036 0.01035261 0.00969028 0.00992322]
|
|
|
|
mean value: 0.010185146331787109
|
|
|
|
key: score_time
|
|
value: [0.00972486 0.00949264 0.00957704 0.00875735 0.00879574 0.00882769
|
|
0.0090363 0.00884104 0.00873518 0.00879645]
|
|
|
|
mean value: 0.009058427810668946
|
|
|
|
key: test_mcc
|
|
value: [ 0.33928571 0.09449112 0.4472136 0.71428571 0.14285714 0.42857143
|
|
0.31622777 0. -0.31622777 0. ]
|
|
|
|
mean value: 0.21667047137522646
|
|
|
|
key: train_mcc
|
|
value: [0.63789683 0.6852819 0.64070322 0.71910121 0.6253054 0.67195703
|
|
0.6253054 0.78125 0.72015793 0.59491308]
|
|
|
|
mean value: 0.6701871989258454
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.53333333 0.71428571 0.85714286 0.57142857 0.71428571
|
|
0.64285714 0.5 0.35714286 0.5 ]
|
|
|
|
mean value: 0.6057142857142858
|
|
|
|
key: train_accuracy
|
|
value: [0.81889764 0.84251969 0.8203125 0.859375 0.8125 0.8359375
|
|
0.8125 0.890625 0.859375 0.796875 ]
|
|
|
|
mean value: 0.8348917322834646
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.46153846 0.75 0.85714286 0.57142857 0.71428571
|
|
0.54545455 0.53333333 0.18181818 0.46153846]
|
|
|
|
mean value: 0.5743206793206793
|
|
|
|
key: train_fscore
|
|
value: [0.81889764 0.83870968 0.81889764 0.86153846 0.81538462 0.83464567
|
|
0.81538462 0.890625 0.86363636 0.79032258]
|
|
|
|
mean value: 0.8348042258890461
|
|
|
|
key: test_precision
|
|
value: [0.625 0.6 0.66666667 0.85714286 0.57142857 0.71428571
|
|
0.75 0.5 0.25 0.5 ]
|
|
|
|
mean value: 0.603452380952381
|
|
|
|
key: train_precision
|
|
value: [0.82539683 0.85245902 0.82539683 0.84848485 0.8030303 0.84126984
|
|
0.8030303 0.890625 0.83823529 0.81666667]
|
|
|
|
mean value: 0.8344594923786702
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.375 0.85714286 0.85714286 0.57142857 0.71428571
|
|
0.42857143 0.57142857 0.14285714 0.42857143]
|
|
|
|
mean value: 0.5660714285714286
|
|
|
|
key: train_recall
|
|
value: [0.8125 0.82539683 0.8125 0.875 0.828125 0.828125
|
|
0.828125 0.890625 0.890625 0.765625 ]
|
|
|
|
mean value: 0.8356646825396825
|
|
|
|
key: test_roc_auc
|
|
value: [0.66964286 0.54464286 0.71428571 0.85714286 0.57142857 0.71428571
|
|
0.64285714 0.5 0.35714286 0.5 ]
|
|
|
|
mean value: 0.6071428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [0.81894841 0.84238591 0.8203125 0.859375 0.8125 0.8359375
|
|
0.8125 0.890625 0.859375 0.796875 ]
|
|
|
|
mean value: 0.8348834325396826
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.3 0.6 0.75 0.4 0.55555556
|
|
0.375 0.36363636 0.1 0.3 ]
|
|
|
|
mean value: 0.4244191919191919
|
|
|
|
key: train_jcc
|
|
value: [0.69333333 0.72222222 0.69333333 0.75675676 0.68831169 0.71621622
|
|
0.68831169 0.8028169 0.76 0.65333333]
|
|
|
|
mean value: 0.7174635473227022
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.72048855 0.58740497 0.71089745 0.57400823 0.57025862 0.57146072
|
|
0.68641615 0.55554175 0.5684855 0.64807653]
|
|
|
|
mean value: 0.619303846359253
|
|
|
|
key: score_time
|
|
value: [0.01467466 0.01231575 0.01452303 0.01452589 0.01463056 0.01481271
|
|
0.01819897 0.01205873 0.01488829 0.01502252]
|
|
|
|
mean value: 0.014565110206604004
|
|
|
|
key: test_mcc
|
|
value: [ 0.33928571 0.21821789 0.1490712 0.57735027 0.14285714 0.57735027
|
|
0.8660254 0. -0.28867513 0.28867513]
|
|
|
|
mean value: 0.28701578880425255
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.6 0.57142857 0.78571429 0.57142857 0.78571429
|
|
0.92857143 0.5 0.35714286 0.64285714]
|
|
|
|
mean value: 0.6409523809523809
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.57142857 0.625 0.8 0.57142857 0.8
|
|
0.93333333 0.53333333 0.30769231 0.61538462]
|
|
|
|
mean value: 0.6424267399267399
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.66666667 0.55555556 0.75 0.57142857 0.75
|
|
0.875 0.5 0.33333333 0.66666667]
|
|
|
|
mean value: 0.6293650793650793
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.5 0.71428571 0.85714286 0.57142857 0.85714286
|
|
1. 0.57142857 0.28571429 0.57142857]
|
|
|
|
mean value: 0.6642857142857143
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66964286 0.60714286 0.57142857 0.78571429 0.57142857 0.78571429
|
|
0.92857143 0.5 0.35714286 0.64285714]
|
|
|
|
mean value: 0.6419642857142858
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4 0.45454545 0.66666667 0.4 0.66666667
|
|
0.875 0.36363636 0.18181818 0.44444444]
|
|
|
|
mean value: 0.49527777777777776
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02736211 0.01128983 0.01124406 0.01253748 0.01097155 0.01206899
|
|
0.01089215 0.01065326 0.01102686 0.01143122]
|
|
|
|
mean value: 0.012947750091552735
|
|
|
|
key: score_time
|
|
value: [0.01159692 0.00895143 0.00873518 0.00955987 0.00854182 0.00929856
|
|
0.00847149 0.00848365 0.00856256 0.00918412]
|
|
|
|
mean value: 0.00913856029510498
|
|
|
|
key: test_mcc
|
|
value: [0.33928571 0.875 1. 0.8660254 0.57735027 0.8660254
|
|
0.63245553 0. 1. 0.28867513]
|
|
|
|
mean value: 0.6444817457672706
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.93333333 1. 0.92857143 0.78571429 0.92857143
|
|
0.78571429 0.5 1. 0.64285714]
|
|
|
|
mean value: 0.8171428571428572
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.93333333 1. 0.92307692 0.76923077 0.93333333
|
|
0.72727273 0.53333333 1. 0.61538462]
|
|
|
|
mean value: 0.8101631701631702
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 1. 1. 1. 0.83333333 0.875
|
|
1. 0.5 1. 0.66666667]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.875 1. 0.85714286 0.71428571 1.
|
|
0.57142857 0.57142857 1. 0.57142857]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66964286 0.9375 1. 0.92857143 0.78571429 0.92857143
|
|
0.78571429 0.5 1. 0.64285714]
|
|
|
|
mean value: 0.8178571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.875 1. 0.85714286 0.625 0.875
|
|
0.57142857 0.36363636 1. 0.44444444]
|
|
|
|
mean value: 0.7111652236652236
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08901286 0.09013486 0.08958101 0.08881617 0.08836842 0.08858609
|
|
0.08868885 0.08849192 0.08824515 0.08973527]
|
|
|
|
mean value: 0.08896605968475342
|
|
|
|
key: score_time
|
|
value: [0.01702809 0.01857686 0.01709199 0.01713276 0.01712823 0.01718926
|
|
0.01714444 0.01747155 0.01718402 0.01791286]
|
|
|
|
mean value: 0.01738600730895996
|
|
|
|
key: test_mcc
|
|
value: [ 0.19642857 0.07142857 0.74535599 0.57735027 0.42857143 0.57735027
|
|
0.4472136 -0.28867513 -0.4472136 0.1490712 ]
|
|
|
|
mean value: 0.24568811662129258
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.53333333 0.85714286 0.78571429 0.71428571 0.78571429
|
|
0.71428571 0.35714286 0.28571429 0.57142857]
|
|
|
|
mean value: 0.6204761904761905
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.53333333 0.83333333 0.8 0.71428571 0.8
|
|
0.66666667 0.4 0.16666667 0.5 ]
|
|
|
|
mean value: 0.5985714285714285
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.57142857 1. 0.75 0.71428571 0.75
|
|
0.8 0.375 0.2 0.6 ]
|
|
|
|
mean value: 0.6332142857142857
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.57142857 0.5 0.71428571 0.85714286 0.71428571 0.85714286
|
|
0.57142857 0.42857143 0.14285714 0.42857143]
|
|
|
|
mean value: 0.5785714285714285
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.59821429 0.53571429 0.85714286 0.78571429 0.71428571 0.78571429
|
|
0.71428571 0.35714286 0.28571429 0.57142857]
|
|
|
|
mean value: 0.6205357142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.36363636 0.71428571 0.66666667 0.55555556 0.66666667
|
|
0.5 0.25 0.09090909 0.33333333]
|
|
|
|
mean value: 0.4541053391053391
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0096097 0.0091033 0.00874829 0.00875807 0.008811 0.00929141
|
|
0.00984359 0.00904584 0.00888586 0.00879669]
|
|
|
|
mean value: 0.009089374542236328
|
|
|
|
key: score_time
|
|
value: [0.00904441 0.00886655 0.00872993 0.00869799 0.0087254 0.0087328
|
|
0.00910592 0.00863886 0.00867748 0.00856519]
|
|
|
|
mean value: 0.00877845287322998
|
|
|
|
key: test_mcc
|
|
value: [ 0.33928571 0.07142857 0.57735027 0.42857143 0.57735027 0.1490712
|
|
0. -0.14285714 -0.42857143 0.1490712 ]
|
|
|
|
mean value: 0.17207000782363663
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.53333333 0.78571429 0.71428571 0.78571429 0.57142857
|
|
0.5 0.42857143 0.28571429 0.57142857]
|
|
|
|
mean value: 0.5842857142857143
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.53333333 0.8 0.71428571 0.8 0.625
|
|
0.36363636 0.42857143 0.28571429 0.5 ]
|
|
|
|
mean value: 0.5717207792207792
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.625 0.57142857 0.75 0.71428571 0.75 0.55555556
|
|
0.5 0.42857143 0.28571429 0.6 ]
|
|
|
|
mean value: 0.5780555555555555
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.5 0.85714286 0.71428571 0.85714286 0.71428571
|
|
0.28571429 0.42857143 0.28571429 0.42857143]
|
|
|
|
mean value: 0.5785714285714285
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66964286 0.53571429 0.78571429 0.71428571 0.78571429 0.57142857
|
|
0.5 0.42857143 0.28571429 0.57142857]
|
|
|
|
mean value: 0.5848214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.36363636 0.66666667 0.55555556 0.66666667 0.45454545
|
|
0.22222222 0.27272727 0.16666667 0.33333333]
|
|
|
|
mean value: 0.4202020202020202
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.13835144 1.1283567 1.13199878 1.13944674 1.13446093 1.13452864
|
|
1.12859964 1.1291821 1.12874389 1.12446833]
|
|
|
|
mean value: 1.1318137168884277
|
|
|
|
key: score_time
|
|
value: [0.08793807 0.08876872 0.09104156 0.08774018 0.08761907 0.08778667
|
|
0.14704132 0.09132361 0.09439731 0.09718728]
|
|
|
|
mean value: 0.0960843801498413
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.76376262 0.8660254 0.8660254 0.8660254 0.74535599
|
|
0.74535599 0. 0.4472136 0.42857143]
|
|
|
|
mean value: 0.6106300309259763
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.86666667 0.92857143 0.92857143 0.92857143 0.85714286
|
|
0.85714286 0.5 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7961904761904762
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.85714286 0.92307692 0.92307692 0.92307692 0.875
|
|
0.83333333 0.53333333 0.66666667 0.71428571]
|
|
|
|
mean value: 0.795487502693385
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6 1. 1. 1. 1. 0.77777778
|
|
1. 0.5 0.8 0.71428571]
|
|
|
|
mean value: 0.8392063492063492
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.75 0.85714286 0.85714286 0.85714286 1.
|
|
0.71428571 0.57142857 0.57142857 0.71428571]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.67857143 0.875 0.92857143 0.92857143 0.92857143 0.85714286
|
|
0.85714286 0.5 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7982142857142858
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.75 0.85714286 0.85714286 0.85714286 0.77777778
|
|
0.71428571 0.36363636 0.5 0.55555556]
|
|
|
|
mean value: 0.6778138528138528
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.83326268 0.87439442 0.91672993 0.89082122 0.88721848 0.86894846
|
|
0.90624857 0.85243893 0.90239453 0.89272261]
|
|
|
|
mean value: 0.8825179815292359
|
|
|
|
key: score_time
|
|
value: [0.22324824 0.22895718 0.18167663 0.22090197 0.22210288 0.22428799
|
|
0.13536072 0.24140263 0.21208525 0.23036623]
|
|
|
|
mean value: 0.21203896999359131
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.60714286 0.8660254 1. 0.71428571 0.74535599
|
|
0.63245553 0.1490712 0.31622777 0.42857143]
|
|
|
|
mean value: 0.5837100365844096
|
|
|
|
key: train_mcc
|
|
value: [0.93745372 0.93889821 0.93933644 0.93933644 0.90802522 0.95417386
|
|
0.92288947 0.95417386 0.93933644 0.9379581 ]
|
|
|
|
mean value: 0.9371581765131688
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.8 0.92857143 1. 0.85714286 0.85714286
|
|
0.78571429 0.57142857 0.64285714 0.71428571]
|
|
|
|
mean value: 0.7823809523809524
|
|
|
|
key: train_accuracy
|
|
value: [0.96850394 0.96850394 0.96875 0.96875 0.953125 0.9765625
|
|
0.9609375 0.9765625 0.96875 0.96875 ]
|
|
|
|
mean value: 0.9679195374015748
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.8 0.93333333 1. 0.85714286 0.875
|
|
0.72727273 0.625 0.54545455 0.71428571]
|
|
|
|
mean value: 0.7783371530430354
|
|
|
|
key: train_fscore
|
|
value: [0.96923077 0.96923077 0.96969697 0.96969697 0.95454545 0.97709924
|
|
0.96183206 0.97709924 0.96969697 0.96923077]
|
|
|
|
mean value: 0.9687359205679816
|
|
|
|
key: test_precision
|
|
value: [0.6 0.85714286 0.875 1. 0.85714286 0.77777778
|
|
1. 0.55555556 0.75 0.71428571]
|
|
|
|
mean value: 0.7986904761904762
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.95454545 0.94029851 0.94117647 0.94117647 0.92647059 0.95522388
|
|
0.94029851 0.95522388 0.94117647 0.95454545]
|
|
|
|
mean value: 0.9450135685210312
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.75 1. 1. 0.85714286 1.
|
|
0.57142857 0.71428571 0.42857143 0.71428571]
|
|
|
|
mean value: 0.7892857142857143
|
|
|
|
key: train_recall
|
|
value: [0.984375 1. 1. 1. 0.984375 1. 0.984375 1.
|
|
1. 0.984375]
|
|
|
|
mean value: 0.99375
|
|
|
|
key: test_roc_auc
|
|
value: [0.67857143 0.80357143 0.92857143 1. 0.85714286 0.85714286
|
|
0.78571429 0.57142857 0.64285714 0.71428571]
|
|
|
|
mean value: 0.7839285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.96837798 0.96875 0.96875 0.96875 0.953125 0.9765625
|
|
0.9609375 0.9765625 0.96875 0.96875 ]
|
|
|
|
mean value: 0.9679315476190476
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.66666667 0.875 1. 0.75 0.77777778
|
|
0.57142857 0.45454545 0.375 0.55555556]
|
|
|
|
mean value: 0.6571428571428571
|
|
|
|
key: train_jcc
|
|
value: [0.94029851 0.94029851 0.94117647 0.94117647 0.91304348 0.95522388
|
|
0.92647059 0.95522388 0.94117647 0.94029851]
|
|
|
|
mean value: 0.9394386761842959
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02314949 0.00937533 0.0098002 0.00965309 0.0087173 0.00990558
|
|
0.00964594 0.00971007 0.00986576 0.00981593]
|
|
|
|
mean value: 0.010963869094848634
|
|
|
|
key: score_time
|
|
value: [0.01428938 0.00957179 0.00965691 0.00903916 0.00860858 0.00936079
|
|
0.00952983 0.00949478 0.00943947 0.00927114]
|
|
|
|
mean value: 0.009826183319091797
|
|
|
|
key: test_mcc
|
|
value: [ 0.18898224 0.49099025 0.4472136 -0.31622777 0.14285714 0.42857143
|
|
0. -0.1490712 -0.63245553 0.1490712 ]
|
|
|
|
mean value: 0.0749931358413612
|
|
|
|
key: train_mcc
|
|
value: [0.48209995 0.40158859 0.438357 0.42233925 0.42610928 0.40946151
|
|
0.43943537 0.50024432 0.53229065 0.438357 ]
|
|
|
|
mean value: 0.44902829325504817
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.73333333 0.71428571 0.35714286 0.57142857 0.71428571
|
|
0.5 0.42857143 0.21428571 0.57142857]
|
|
|
|
mean value: 0.5404761904761904
|
|
|
|
key: train_accuracy
|
|
value: [0.74015748 0.7007874 0.71875 0.7109375 0.7109375 0.703125
|
|
0.71875 0.75 0.765625 0.71875 ]
|
|
|
|
mean value: 0.7237819881889764
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.71428571 0.75 0.18181818 0.57142857 0.71428571
|
|
0.36363636 0.5 0. 0.5 ]
|
|
|
|
mean value: 0.47954545454545455
|
|
|
|
key: train_fscore
|
|
value: [0.73170732 0.69354839 0.70967742 0.704 0.68907563 0.68333333
|
|
0.70491803 0.75384615 0.75806452 0.70967742]
|
|
|
|
mean value: 0.7137848209227128
|
|
|
|
key: test_precision
|
|
value: [0.6 0.83333333 0.66666667 0.25 0.57142857 0.71428571
|
|
0.5 0.44444444 0. 0.6 ]
|
|
|
|
mean value: 0.518015873015873
|
|
|
|
key: train_precision
|
|
value: [0.76271186 0.70491803 0.73333333 0.72131148 0.74545455 0.73214286
|
|
0.74137931 0.74242424 0.78333333 0.73333333]
|
|
|
|
mean value: 0.7400342327969973
|
|
|
|
key: test_recall
|
|
value: [0.42857143 0.625 0.85714286 0.14285714 0.57142857 0.71428571
|
|
0.28571429 0.57142857 0. 0.42857143]
|
|
|
|
mean value: 0.46249999999999997
|
|
|
|
key: train_recall
|
|
value: [0.703125 0.68253968 0.6875 0.6875 0.640625 0.640625
|
|
0.671875 0.765625 0.734375 0.6875 ]
|
|
|
|
mean value: 0.6901289682539683
|
|
|
|
key: test_roc_auc
|
|
value: [0.58928571 0.74107143 0.71428571 0.35714286 0.57142857 0.71428571
|
|
0.5 0.42857143 0.21428571 0.57142857]
|
|
|
|
mean value: 0.5401785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.74045139 0.70064484 0.71875 0.7109375 0.7109375 0.703125
|
|
0.71875 0.75 0.765625 0.71875 ]
|
|
|
|
mean value: 0.7237971230158731
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.55555556 0.6 0.1 0.4 0.55555556
|
|
0.22222222 0.33333333 0. 0.33333333]
|
|
|
|
mean value: 0.3433333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.57692308 0.5308642 0.55 0.54320988 0.52564103 0.51898734
|
|
0.5443038 0.60493827 0.61038961 0.55 ]
|
|
|
|
mean value: 0.5555257197873231
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.06581926 0.05272436 0.0533154 0.04687142 0.04571605 0.04804492
|
|
0.05116105 0.04486775 0.04888487 0.05014682]
|
|
|
|
mean value: 0.0507551908493042
|
|
|
|
key: score_time
|
|
value: [0.01139712 0.0113914 0.01027155 0.01038671 0.0105803 0.01110101
|
|
0.0105691 0.0107646 0.01118851 0.01136994]
|
|
|
|
mean value: 0.010902023315429688
|
|
|
|
key: test_mcc
|
|
value: [0.66143783 0.87287156 1. 0.8660254 0.71428571 0.71428571
|
|
0.74535599 0.1490712 0.8660254 0.42857143]
|
|
|
|
mean value: 0.7017930244421767
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.93333333 1. 0.92857143 0.85714286 0.85714286
|
|
0.85714286 0.57142857 0.92857143 0.71428571]
|
|
|
|
mean value: 0.8447619047619047
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.94117647 1. 0.92307692 0.85714286 0.85714286
|
|
0.83333333 0.625 0.93333333 0.71428571]
|
|
|
|
mean value: 0.850802090066796
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.88888889 1. 1. 0.85714286 0.85714286
|
|
1. 0.55555556 0.875 0.71428571]
|
|
|
|
mean value: 0.8448015873015873
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.85714286 0.85714286 0.85714286
|
|
0.71428571 0.71428571 1. 0.71428571]
|
|
|
|
mean value: 0.8714285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.92857143 1. 0.92857143 0.85714286 0.85714286
|
|
0.85714286 0.57142857 0.92857143 0.71428571]
|
|
|
|
mean value: 0.8455357142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.88888889 1. 0.85714286 0.75 0.75
|
|
0.71428571 0.45454545 0.875 0.55555556]
|
|
|
|
mean value: 0.754541847041847
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02718544 0.04948449 0.05275106 0.04704428 0.05495071 0.05065632
|
|
0.04379416 0.04743075 0.04433584 0.05247545]
|
|
|
|
mean value: 0.04701085090637207
|
|
|
|
key: score_time
|
|
value: [0.02026653 0.02336526 0.01186585 0.02184844 0.01182151 0.02222586
|
|
0.02072549 0.02242494 0.02011228 0.02445412]
|
|
|
|
mean value: 0.019911026954650878
|
|
|
|
key: test_mcc
|
|
value: [-0.04029115 0.09449112 0.57735027 0.42857143 0. 0.28867513
|
|
0. 0. 0.1490712 0. ]
|
|
|
|
mean value: 0.1497868000906891
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 0.96922337
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9969223369195119
|
|
|
|
key: test_accuracy
|
|
value: [0.46666667 0.53333333 0.78571429 0.71428571 0.5 0.64285714
|
|
0.5 0.5 0.57142857 0.5 ]
|
|
|
|
mean value: 0.5714285714285714
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.984375 1. 1.
|
|
1. 1. ]
|
|
|
|
mean value: 0.9984375
|
|
|
|
key: test_fscore
|
|
value: [0.55555556 0.46153846 0.76923077 0.71428571 0.36363636 0.61538462
|
|
0.36363636 0.53333333 0.625 0.46153846]
|
|
|
|
mean value: 0.5463139638139638
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 0.98461538
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9984615384615385
|
|
|
|
key: test_precision
|
|
value: [0.45454545 0.6 0.83333333 0.71428571 0.5 0.66666667
|
|
0.5 0.5 0.55555556 0.5 ]
|
|
|
|
mean value: 0.5824386724386724
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.96969697
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.996969696969697
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.375 0.71428571 0.71428571 0.28571429 0.57142857
|
|
0.28571429 0.57142857 0.71428571 0.42857143]
|
|
|
|
mean value: 0.5375
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.48214286 0.54464286 0.78571429 0.71428571 0.5 0.64285714
|
|
0.5 0.5 0.57142857 0.5 ]
|
|
|
|
mean value: 0.5741071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.984375 1. 1.
|
|
1. 1. ]
|
|
|
|
mean value: 0.9984375
|
|
|
|
key: test_jcc
|
|
value: [0.38461538 0.3 0.625 0.55555556 0.22222222 0.44444444
|
|
0.22222222 0.36363636 0.45454545 0.3 ]
|
|
|
|
mean value: 0.3872241647241647
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 0.96969697
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.996969696969697
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0241859 0.00881934 0.00854516 0.00849938 0.00865054 0.00857329
|
|
0.00858045 0.00881457 0.00852299 0.00857997]
|
|
|
|
mean value: 0.010177159309387207
|
|
|
|
key: score_time
|
|
value: [0.00976634 0.00857759 0.00845194 0.00852895 0.00844574 0.00858092
|
|
0.00846004 0.00851583 0.0085721 0.00858021]
|
|
|
|
mean value: 0.008647966384887695
|
|
|
|
key: test_mcc
|
|
value: [ 0.21821789 0.32732684 0.17407766 0.71428571 -0.1490712 0.57735027
|
|
0.1490712 0.4472136 0.1490712 0.1490712 ]
|
|
|
|
mean value: 0.2756614357520949
|
|
|
|
key: train_mcc
|
|
value: [0.38660962 0.3754942 0.42824786 0.39298268 0.42442129 0.39298268
|
|
0.38177086 0.34442336 0.44095855 0.43943537]
|
|
|
|
mean value: 0.40073264569464856
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.66666667 0.57142857 0.85714286 0.42857143 0.78571429
|
|
0.57142857 0.71428571 0.57142857 0.57142857]
|
|
|
|
mean value: 0.6338095238095238
|
|
|
|
key: train_accuracy
|
|
value: [0.69291339 0.68503937 0.7109375 0.6953125 0.7109375 0.6953125
|
|
0.6875 0.671875 0.71875 0.71875 ]
|
|
|
|
mean value: 0.6987327755905511
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.70588235 0.66666667 0.85714286 0.5 0.8
|
|
0.5 0.75 0.5 0.625 ]
|
|
|
|
mean value: 0.6529691876750701
|
|
|
|
key: train_fscore
|
|
value: [0.70676692 0.70588235 0.73381295 0.71111111 0.72592593 0.71111111
|
|
0.71428571 0.68181818 0.73529412 0.73134328]
|
|
|
|
mean value: 0.715735166535589
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.66666667 0.54545455 0.85714286 0.44444444 0.75
|
|
0.6 0.66666667 0.6 0.55555556]
|
|
|
|
mean value: 0.6241486291486291
|
|
|
|
key: train_precision
|
|
value: [0.68115942 0.65753425 0.68 0.67605634 0.69014085 0.67605634
|
|
0.65789474 0.66176471 0.69444444 0.7 ]
|
|
|
|
mean value: 0.6775051075160861
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.75 0.85714286 0.85714286 0.57142857 0.85714286
|
|
0.42857143 0.85714286 0.42857143 0.71428571]
|
|
|
|
mean value: 0.7035714285714285
|
|
|
|
key: train_recall
|
|
value: [0.734375 0.76190476 0.796875 0.75 0.765625 0.75
|
|
0.78125 0.703125 0.78125 0.765625 ]
|
|
|
|
mean value: 0.7590029761904762
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.66071429 0.57142857 0.85714286 0.42857143 0.78571429
|
|
0.57142857 0.71428571 0.57142857 0.57142857]
|
|
|
|
mean value: 0.6339285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.69258433 0.68563988 0.7109375 0.6953125 0.7109375 0.6953125
|
|
0.6875 0.671875 0.71875 0.71875 ]
|
|
|
|
mean value: 0.6987599206349207
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.54545455 0.5 0.75 0.33333333 0.66666667
|
|
0.33333333 0.6 0.33333333 0.45454545]
|
|
|
|
mean value: 0.4971212121212121
|
|
|
|
key: train_jcc
|
|
value: [0.54651163 0.54545455 0.57954545 0.55172414 0.56976744 0.55172414
|
|
0.55555556 0.51724138 0.58139535 0.57647059]
|
|
|
|
mean value: 0.5575390217567915
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01038313 0.01427412 0.01598454 0.01353955 0.01545024 0.01576066
|
|
0.01404667 0.01471567 0.01347804 0.01635647]
|
|
|
|
mean value: 0.014398908615112305
|
|
|
|
key: score_time
|
|
value: [0.00853682 0.01141334 0.01140761 0.01149416 0.01144385 0.01149607
|
|
0.01146412 0.01140809 0.01136732 0.01147699]
|
|
|
|
mean value: 0.011150836944580078
|
|
|
|
key: test_mcc
|
|
value: [0.46428571 0.56407607 0.71428571 0.71428571 0.42857143 0.74535599
|
|
0.57735027 0.57735027 0.17407766 0.28867513]
|
|
|
|
mean value: 0.5248313967676029
|
|
|
|
key: train_mcc
|
|
value: [0.86101708 0.72678367 0.82717019 0.80168466 0.85042006 0.84063468
|
|
0.78756153 0.90625 0.77459667 0.93933644]
|
|
|
|
mean value: 0.8315454985026968
|
|
|
|
key: test_accuracy
|
|
value: [0.73333333 0.73333333 0.85714286 0.85714286 0.71428571 0.85714286
|
|
0.78571429 0.78571429 0.57142857 0.64285714]
|
|
|
|
mean value: 0.7538095238095238
|
|
|
|
key: train_accuracy
|
|
value: [0.92913386 0.8503937 0.90625 0.8984375 0.921875 0.9140625
|
|
0.8828125 0.953125 0.875 0.96875 ]
|
|
|
|
mean value: 0.9099840059055118
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.66666667 0.85714286 0.85714286 0.71428571 0.83333333
|
|
0.8 0.8 0.66666667 0.61538462]
|
|
|
|
mean value: 0.7524908424908424
|
|
|
|
key: train_fscore
|
|
value: [0.92682927 0.82568807 0.89655172 0.9037037 0.92647059 0.90598291
|
|
0.8951049 0.953125 0.88888889 0.96969697]
|
|
|
|
mean value: 0.9092042017437767
|
|
|
|
key: test_precision
|
|
value: [0.71428571 1. 0.85714286 0.85714286 0.71428571 1.
|
|
0.75 0.75 0.54545455 0.66666667]
|
|
|
|
mean value: 0.7854978354978355
|
|
|
|
key: train_precision
|
|
value: [0.96610169 0.97826087 1. 0.85915493 0.875 1.
|
|
0.81012658 0.953125 0.8 0.94117647]
|
|
|
|
mean value: 0.9182945546924652
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.5 0.85714286 0.85714286 0.71428571 0.71428571
|
|
0.85714286 0.85714286 0.85714286 0.57142857]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_recall
|
|
value: [0.890625 0.71428571 0.8125 0.953125 0.984375 0.828125
|
|
1. 0.953125 1. 1. ]
|
|
|
|
mean value: 0.9136160714285715
|
|
|
|
key: test_roc_auc
|
|
value: [0.73214286 0.75 0.85714286 0.85714286 0.71428571 0.85714286
|
|
0.78571429 0.78571429 0.57142857 0.64285714]
|
|
|
|
mean value: 0.7553571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.92943948 0.84933036 0.90625 0.8984375 0.921875 0.9140625
|
|
0.8828125 0.953125 0.875 0.96875 ]
|
|
|
|
mean value: 0.9099082341269842
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.5 0.75 0.75 0.55555556 0.71428571
|
|
0.66666667 0.66666667 0.5 0.44444444]
|
|
|
|
mean value: 0.6103174603174604
|
|
|
|
key: train_jcc
|
|
value: [0.86363636 0.703125 0.8125 0.82432432 0.8630137 0.828125
|
|
0.81012658 0.91044776 0.8 0.94117647]
|
|
|
|
mean value: 0.8356475200651571
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01415157 0.01356316 0.01522613 0.01370096 0.01369357 0.01361775
|
|
0.01383018 0.0135622 0.01277542 0.01345587]
|
|
|
|
mean value: 0.013757681846618653
|
|
|
|
key: score_time
|
|
value: [0.0115037 0.01245689 0.01166725 0.01139021 0.01141787 0.01142097
|
|
0.01140141 0.01140237 0.01158595 0.02333975]
|
|
|
|
mean value: 0.012758636474609375
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0. 0.74535599 0.8660254 0.28867513 0.57735027
|
|
0.63245553 0.17407766 0. 0.1490712 ]
|
|
|
|
mean value: 0.38109756595673944
|
|
|
|
key: train_mcc
|
|
value: [0.89071137 0.35476806 0.64978629 0.83643673 0.85042006 0.87542756
|
|
0.85947992 0.45557345 0.50487816 0.90669283]
|
|
|
|
mean value: 0.7184174438156392
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.46666667 0.85714286 0.92857143 0.64285714 0.78571429
|
|
0.78571429 0.57142857 0.5 0.57142857]
|
|
|
|
mean value: 0.6776190476190476
|
|
|
|
key: train_accuracy
|
|
value: [0.94488189 0.61417323 0.796875 0.9140625 0.921875 0.9375
|
|
0.9296875 0.671875 0.703125 0.953125 ]
|
|
|
|
mean value: 0.8387180118110236
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0. 0.83333333 0.92307692 0.61538462 0.8
|
|
0.72727273 0.4 0.63157895 0.5 ]
|
|
|
|
mean value: 0.6136528899377196
|
|
|
|
key: train_fscore
|
|
value: [0.94656489 0.36363636 0.74509804 0.90756303 0.91666667 0.93846154
|
|
0.92913386 0.51162791 0.77108434 0.95238095]
|
|
|
|
mean value: 0.7982217573661332
|
|
|
|
key: test_precision
|
|
value: [0.6 0. 1. 1. 0.66666667 0.75
|
|
1. 0.66666667 0.5 0.6 ]
|
|
|
|
mean value: 0.6783333333333333
|
|
|
|
key: train_precision
|
|
value: [0.92537313 1. 1. 0.98181818 0.98214286 0.92424242
|
|
0.93650794 1. 0.62745098 0.96774194]
|
|
|
|
mean value: 0.9345277449915785
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0. 0.71428571 0.85714286 0.57142857 0.85714286
|
|
0.57142857 0.28571429 0.85714286 0.42857143]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_recall
|
|
value: [0.96875 0.22222222 0.59375 0.84375 0.859375 0.953125
|
|
0.921875 0.34375 1. 0.9375 ]
|
|
|
|
mean value: 0.7644097222222223
|
|
|
|
key: test_roc_auc
|
|
value: [0.67857143 0.5 0.85714286 0.92857143 0.64285714 0.78571429
|
|
0.78571429 0.57142857 0.5 0.57142857]
|
|
|
|
mean value: 0.6821428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [0.94469246 0.61111111 0.796875 0.9140625 0.921875 0.9375
|
|
0.9296875 0.671875 0.703125 0.953125 ]
|
|
|
|
mean value: 0.8383928571428572
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0. 0.71428571 0.85714286 0.44444444 0.66666667
|
|
0.57142857 0.25 0.46153846 0.33333333]
|
|
|
|
mean value: 0.4844294594294594
|
|
|
|
key: train_jcc
|
|
value: [0.89855072 0.22222222 0.59375 0.83076923 0.84615385 0.88405797
|
|
0.86764706 0.34375 0.62745098 0.90909091]
|
|
|
|
mean value: 0.7023442943104068
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11391807 0.09409237 0.09487605 0.09567189 0.09379506 0.09356427
|
|
0.09453201 0.09791088 0.09554052 0.09554005]
|
|
|
|
mean value: 0.09694411754608154
|
|
|
|
key: score_time
|
|
value: [0.01464009 0.0145905 0.01488519 0.01464534 0.01463294 0.01461124
|
|
0.01467419 0.0150106 0.01514769 0.01581359]
|
|
|
|
mean value: 0.01486513614654541
|
|
|
|
key: test_mcc
|
|
value: [0.66143783 0.87287156 1. 1. 0.74535599 0.8660254
|
|
0.52223297 0.28867513 0.8660254 0.57735027]
|
|
|
|
mean value: 0.7399974560430457
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.93333333 1. 1. 0.85714286 0.92857143
|
|
0.71428571 0.64285714 0.92857143 0.78571429]
|
|
|
|
mean value: 0.8590476190476191
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.94117647 1. 1. 0.83333333 0.92307692
|
|
0.6 0.66666667 0.93333333 0.76923077]
|
|
|
|
mean value: 0.8490346907993966
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.88888889 1. 1. 1. 1.
|
|
1. 0.625 0.875 0.83333333]
|
|
|
|
mean value: 0.8922222222222222
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.71428571 0.85714286
|
|
0.42857143 0.71428571 1. 0.71428571]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.92857143 1. 1. 0.85714286 0.92857143
|
|
0.71428571 0.64285714 0.92857143 0.78571429]
|
|
|
|
mean value: 0.8598214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.88888889 1. 1. 0.71428571 0.85714286
|
|
0.42857143 0.5 0.875 0.625 ]
|
|
|
|
mean value: 0.7588888888888888
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.0
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03230977 0.03465533 0.05089736 0.04256916 0.04040456 0.05487514
|
|
0.04169941 0.04757524 0.03255701 0.04550004]
|
|
|
|
mean value: 0.04230430126190186
|
|
|
|
key: score_time
|
|
value: [0.01723957 0.02700257 0.02792835 0.0355022 0.02915096 0.02598453
|
|
0.03048086 0.01950192 0.01615834 0.01654243]
|
|
|
|
mean value: 0.024549174308776855
|
|
|
|
key: test_mcc
|
|
value: [0.76376262 0.87287156 1. 1. 0.57735027 0.8660254
|
|
0.74535599 0.1490712 0.8660254 0.57735027]
|
|
|
|
mean value: 0.7417812713717987
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.98449518 0.95324137 0.95324137 0.96922337
|
|
0.98449518 0.96922337 0.96922337 1. ]
|
|
|
|
mean value: 0.9783143216676922
|
|
|
|
key: test_accuracy
|
|
value: [0.86666667 0.93333333 1. 1. 0.78571429 0.92857143
|
|
0.85714286 0.57142857 0.92857143 0.78571429]
|
|
|
|
mean value: 0.8657142857142857
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.9921875 0.9765625 0.9765625 0.984375 0.9921875
|
|
0.984375 0.984375 1. ]
|
|
|
|
mean value: 0.9890625
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.94117647 1. 1. 0.76923077 0.93333333
|
|
0.83333333 0.625 0.93333333 0.76923077]
|
|
|
|
mean value: 0.8679638009049774
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99212598 0.97637795 0.97674419 0.98412698
|
|
0.99224806 0.98412698 0.98412698 1. ]
|
|
|
|
mean value: 0.9889877137450842
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.88888889 1. 1. 0.83333333 0.875
|
|
1. 0.55555556 0.875 0.83333333]
|
|
|
|
mean value: 0.8638888888888889
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.98412698 0.96923077 1.
|
|
0.98461538 1. 1. 1. ]
|
|
|
|
mean value: 0.9937973137973138
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.71428571 1.
|
|
0.71428571 0.71428571 1. 0.71428571]
|
|
|
|
mean value: 0.8857142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.984375 0.96875 0.984375 0.96875 1. 0.96875
|
|
0.96875 1. ]
|
|
|
|
mean value: 0.984375
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.92857143 1. 1. 0.78571429 0.92857143
|
|
0.85714286 0.57142857 0.92857143 0.78571429]
|
|
|
|
mean value: 0.8660714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.9921875 0.9765625 0.9765625 0.984375 0.9921875
|
|
0.984375 0.984375 1. ]
|
|
|
|
mean value: 0.9890625
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.88888889 1. 1. 0.625 0.875
|
|
0.71428571 0.45454545 0.875 0.625 ]
|
|
|
|
mean value: 0.7835497835497836
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.984375 0.95384615 0.95454545 0.96875
|
|
0.98461538 0.96875 0.96875 1. ]
|
|
|
|
mean value: 0.9783631993006994
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03254342 0.05661488 0.04824209 0.05007434 0.06980419 0.04639006
|
|
0.03822303 0.04851413 0.0455544 0.03839159]
|
|
|
|
mean value: 0.047435212135314944
|
|
|
|
key: score_time
|
|
value: [0.02400923 0.0254271 0.02423596 0.02527332 0.02484155 0.02546525
|
|
0.02542686 0.02713227 0.02601194 0.02398038]
|
|
|
|
mean value: 0.02518038749694824
|
|
|
|
key: test_mcc
|
|
value: [ 0.32732684 -0.19642857 0.4472136 0.63245553 0.28867513 0.1490712
|
|
0.28867513 0.14285714 -0.4472136 0. ]
|
|
|
|
mean value: 0.16326324065058476
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.4 0.71428571 0.78571429 0.64285714 0.57142857
|
|
0.64285714 0.57142857 0.28571429 0.5 ]
|
|
|
|
mean value: 0.5780952380952381
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.61538462 0.4 0.66666667 0.72727273 0.66666667 0.5
|
|
0.61538462 0.57142857 0.16666667 0.53333333]
|
|
|
|
mean value: 0.5462803862803862
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.42857143 0.8 1. 0.625 0.6
|
|
0.66666667 0.57142857 0.2 0.5 ]
|
|
|
|
mean value: 0.6058333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.57142857 0.375 0.57142857 0.57142857 0.71428571 0.42857143
|
|
0.57142857 0.57142857 0.14285714 0.57142857]
|
|
|
|
mean value: 0.5089285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.66071429 0.40178571 0.71428571 0.78571429 0.64285714 0.57142857
|
|
0.64285714 0.57142857 0.28571429 0.5 ]
|
|
|
|
mean value: 0.5776785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.44444444 0.25 0.5 0.57142857 0.5 0.33333333
|
|
0.44444444 0.4 0.09090909 0.36363636]
|
|
|
|
mean value: 0.3898196248196248
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.27084661 0.25251555 0.25915909 0.25225282 0.25605011 0.25171423
|
|
0.25446415 0.25518131 0.25559974 0.24836516]
|
|
|
|
mean value: 0.25561487674713135
|
|
|
|
key: score_time
|
|
value: [0.00921893 0.00908136 0.00908971 0.0089159 0.00926518 0.00900149
|
|
0.00906825 0.00937915 0.00925112 0.00913453]
|
|
|
|
mean value: 0.009140563011169434
|
|
|
|
key: test_mcc
|
|
value: [0.66143783 0.87287156 1. 1. 0.71428571 0.8660254
|
|
0.74535599 0.31622777 0.8660254 0.42857143]
|
|
|
|
mean value: 0.7470801097652905
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.93333333 1. 1. 0.85714286 0.92857143
|
|
0.85714286 0.64285714 0.92857143 0.71428571]
|
|
|
|
mean value: 0.8661904761904762
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.94117647 1. 1. 0.85714286 0.93333333
|
|
0.83333333 0.70588235 0.93333333 0.71428571]
|
|
|
|
mean value: 0.8742016806722689
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.88888889 1. 1. 0.85714286 0.875
|
|
1. 0.6 0.875 0.71428571]
|
|
|
|
mean value: 0.851031746031746
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.85714286 1.
|
|
0.71428571 0.85714286 1. 0.71428571]
|
|
|
|
mean value: 0.9142857142857143
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.92857143 1. 1. 0.85714286 0.92857143
|
|
0.85714286 0.64285714 0.92857143 0.71428571]
|
|
|
|
mean value: 0.8669642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.88888889 1. 1. 0.75 0.875
|
|
0.71428571 0.54545455 0.875 0.55555556]
|
|
|
|
mean value: 0.7904184704184705
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01737309 0.016608 0.01730108 0.01654887 0.04343247 0.01705122
|
|
0.02059937 0.01725125 0.01745582 0.01699615]
|
|
|
|
mean value: 0.020061731338500977
|
|
|
|
key: score_time
|
|
value: [0.01210737 0.0118475 0.01199269 0.01186085 0.01219201 0.01192856
|
|
0.01498175 0.01467967 0.01503849 0.01461554]
|
|
|
|
mean value: 0.013124442100524903
|
|
|
|
key: test_mcc
|
|
value: [ 0.05455447 0.20044593 0.14285714 -0.40824829 -0.14285714 -0.14285714
|
|
-0.28867513 -0.14285714 -0.1490712 0. ]
|
|
|
|
mean value: -0.08767085052796311
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.53333333 0.6 0.57142857 0.35714286 0.42857143 0.42857143
|
|
0.35714286 0.42857143 0.42857143 0.5 ]
|
|
|
|
mean value: 0.4633333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.46153846 0.7 0.57142857 0.52631579 0.42857143 0.42857143
|
|
0.30769231 0.42857143 0.5 0.46153846]
|
|
|
|
mean value: 0.4814227877385772
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.58333333 0.57142857 0.41666667 0.42857143 0.42857143
|
|
0.33333333 0.42857143 0.44444444 0.5 ]
|
|
|
|
mean value: 0.4634920634920635
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.42857143 0.875 0.57142857 0.71428571 0.42857143 0.42857143
|
|
0.28571429 0.42857143 0.57142857 0.42857143]
|
|
|
|
mean value: 0.5160714285714285
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.52678571 0.58035714 0.57142857 0.35714286 0.42857143 0.42857143
|
|
0.35714286 0.42857143 0.42857143 0.5 ]
|
|
|
|
mean value: 0.4607142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.3 0.53846154 0.4 0.35714286 0.27272727 0.27272727
|
|
0.18181818 0.27272727 0.33333333 0.3 ]
|
|
|
|
mean value: 0.32289377289377286
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03756571 0.01294708 0.01308179 0.01303172 0.01298833 0.01315022
|
|
0.01306343 0.012995 0.01294088 0.02156067]
|
|
|
|
mean value: 0.016332483291625975
|
|
|
|
key: score_time
|
|
value: [0.0116086 0.01149893 0.01147532 0.01145434 0.01147461 0.01149917
|
|
0.01148558 0.0114975 0.0114572 0.01152682]
|
|
|
|
mean value: 0.011497807502746583
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 0.26189246 0.71428571 0.74535599 0.74535599 0.8660254
|
|
0.8660254 0.4472136 0.28867513 0.4472136 ]
|
|
|
|
mean value: 0.5600261185993421
|
|
|
|
key: train_mcc
|
|
value: [0.93748452 0.93748452 0.92288947 0.89073374 0.89073374 0.90625
|
|
0.85947992 0.95324137 0.95417386 0.9379581 ]
|
|
|
|
mean value: 0.9190429255191599
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 0.85714286 0.85714286 0.85714286 0.92857143
|
|
0.92857143 0.71428571 0.64285714 0.71428571]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_accuracy
|
|
value: [0.96850394 0.96850394 0.9609375 0.9453125 0.9453125 0.953125
|
|
0.9296875 0.9765625 0.9765625 0.96875 ]
|
|
|
|
mean value: 0.9593257874015748
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.5 0.85714286 0.83333333 0.83333333 0.92307692
|
|
0.92307692 0.75 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7578296703296703
|
|
|
|
key: train_fscore
|
|
value: [0.96825397 0.96875 0.96183206 0.94573643 0.94573643 0.953125
|
|
0.92913386 0.97674419 0.97709924 0.96923077]
|
|
|
|
mean value: 0.9595641947725944
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.75 0.85714286 1. 1. 1.
|
|
1. 0.66666667 0.625 0.8 ]
|
|
|
|
mean value: 0.8254365079365079
|
|
|
|
key: train_precision
|
|
value: [0.98387097 0.95384615 0.94029851 0.93846154 0.93846154 0.953125
|
|
0.93650794 0.96923077 0.95522388 0.95454545]
|
|
|
|
mean value: 0.9523571746855028
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.375 0.85714286 0.71428571 0.71428571 0.85714286
|
|
0.85714286 0.85714286 0.71428571 0.57142857]
|
|
|
|
mean value: 0.7232142857142857
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:175: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:178: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.953125 0.98412698 0.984375 0.953125 0.953125 0.953125
|
|
0.921875 0.984375 1. 0.984375 ]
|
|
|
|
mean value: 0.9671626984126984
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.61607143 0.85714286 0.85714286 0.85714286 0.92857143
|
|
0.92857143 0.71428571 0.64285714 0.71428571]
|
|
|
|
mean value: 0.7723214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.96862599 0.96862599 0.9609375 0.9453125 0.9453125 0.953125
|
|
0.9296875 0.9765625 0.9765625 0.96875 ]
|
|
|
|
mean value: 0.9593501984126984
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.33333333 0.75 0.71428571 0.71428571 0.85714286
|
|
0.85714286 0.6 0.5 0.5 ]
|
|
|
|
mean value: 0.628073593073593
|
|
|
|
key: train_jcc
|
|
value: [0.93846154 0.93939394 0.92647059 0.89705882 0.89705882 0.91044776
|
|
0.86764706 0.95454545 0.95522388 0.94029851]
|
|
|
|
mean value: 0.922660637577231
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.17722702 0.11350346 0.21813035 0.2076714 0.21254301 0.24001646
|
|
0.32121396 0.21115351 0.19236588 0.18778825]
|
|
|
|
mean value: 0.2081613302230835
|
|
|
|
key: score_time
|
|
value: [0.0201118 0.01175857 0.01412797 0.02275658 0.02106333 0.02480865
|
|
0.02300787 0.02113318 0.0210979 0.0177002 ]
|
|
|
|
mean value: 0.019756603240966796
|
|
|
|
key: test_mcc
|
|
value: [0.21821789 0.26189246 0.71428571 0.74535599 0.74535599 0.8660254
|
|
0.8660254 0.4472136 0.28867513 0.4472136 ]
|
|
|
|
mean value: 0.5600261185993421
|
|
|
|
key: train_mcc
|
|
value: [0.93748452 0.93748452 0.92288947 0.89073374 0.89073374 0.90625
|
|
0.85947992 0.95324137 0.95417386 0.9379581 ]
|
|
|
|
mean value: 0.9190429255191599
|
|
|
|
key: test_accuracy
|
|
value: [0.6 0.6 0.85714286 0.85714286 0.85714286 0.92857143
|
|
0.92857143 0.71428571 0.64285714 0.71428571]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_accuracy
|
|
value: [0.96850394 0.96850394 0.9609375 0.9453125 0.9453125 0.953125
|
|
0.9296875 0.9765625 0.9765625 0.96875 ]
|
|
|
|
mean value: 0.9593257874015748
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.5 0.85714286 0.83333333 0.83333333 0.92307692
|
|
0.92307692 0.75 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7578296703296703
|
|
|
|
key: train_fscore
|
|
value: [0.96825397 0.96875 0.96183206 0.94573643 0.94573643 0.953125
|
|
0.92913386 0.97674419 0.97709924 0.96923077]
|
|
|
|
mean value: 0.9595641947725944
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.75 0.85714286 1. 1. 1.
|
|
1. 0.66666667 0.625 0.8 ]
|
|
|
|
mean value: 0.8254365079365079
|
|
|
|
key: train_precision
|
|
value: [0.98387097 0.95384615 0.94029851 0.93846154 0.93846154 0.953125
|
|
0.93650794 0.96923077 0.95522388 0.95454545]
|
|
|
|
mean value: 0.9523571746855028
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.375 0.85714286 0.71428571 0.71428571 0.85714286
|
|
0.85714286 0.85714286 0.71428571 0.57142857]
|
|
|
|
mean value: 0.7232142857142857
|
|
|
|
key: train_recall
|
|
value: [0.953125 0.98412698 0.984375 0.953125 0.953125 0.953125
|
|
0.921875 0.984375 1. 0.984375 ]
|
|
|
|
mean value: 0.9671626984126984
|
|
|
|
key: test_roc_auc
|
|
value: [0.60714286 0.61607143 0.85714286 0.85714286 0.85714286 0.92857143
|
|
0.92857143 0.71428571 0.64285714 0.71428571]
|
|
|
|
mean value: 0.7723214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.96862599 0.96862599 0.9609375 0.9453125 0.9453125 0.953125
|
|
0.9296875 0.9765625 0.9765625 0.96875 ]
|
|
|
|
mean value: 0.9593501984126984
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.33333333 0.75 0.71428571 0.71428571 0.85714286
|
|
0.85714286 0.6 0.5 0.5 ]
|
|
|
|
mean value: 0.628073593073593
|
|
|
|
key: train_jcc
|
|
value: [0.93846154 0.93939394 0.92647059 0.89705882 0.89705882 0.91044776
|
|
0.86764706 0.95454545 0.95522388 0.94029851]
|
|
|
|
mean value: 0.922660637577231
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03295398 0.03293633 0.04036927 0.07436967 0.05748558 0.04356003
|
|
0.07211637 0.03021264 0.03299451 0.03802323]
|
|
|
|
mean value: 0.04550216197967529
|
|
|
|
key: score_time
|
|
value: [0.01606202 0.01162028 0.0115943 0.01190186 0.01204824 0.02361083
|
|
0.02349329 0.01182771 0.01200891 0.01200128]
|
|
|
|
mean value: 0.014616870880126953
|
|
|
|
key: test_mcc
|
|
value: [0.39393939 0.66414149 0.65909298 0.48075018 0.74242424 0.74047959
|
|
0.74047959 0.74047959 0.56694671 0.48795004]
|
|
|
|
mean value: 0.6216683798300241
|
|
|
|
key: train_mcc
|
|
value: [0.80500813 0.85463818 0.89371934 0.86356283 0.84407425 0.86358877
|
|
0.86493273 0.88292404 0.85473156 0.87481777]
|
|
|
|
mean value: 0.8601997596512527
|
|
|
|
key: test_accuracy
|
|
value: [0.69565217 0.82608696 0.82608696 0.73913043 0.86956522 0.86956522
|
|
0.86956522 0.86956522 0.77272727 0.72727273]
|
|
|
|
mean value: 0.8065217391304348
|
|
|
|
key: train_accuracy
|
|
value: [0.90243902 0.92682927 0.94634146 0.93170732 0.92195122 0.93170732
|
|
0.93170732 0.94146341 0.92718447 0.9368932 ]
|
|
|
|
mean value: 0.9298224011366327
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.83333333 0.8 0.7 0.86956522 0.88
|
|
0.88 0.88 0.8 0.66666667]
|
|
|
|
mean value: 0.8005217391304347
|
|
|
|
key: train_fscore
|
|
value: [0.90384615 0.92890995 0.9478673 0.93269231 0.9223301 0.93203883
|
|
0.93333333 0.94117647 0.92822967 0.93838863]
|
|
|
|
mean value: 0.9308812739347887
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.76923077 0.88888889 0.77777778 0.90909091 0.84615385
|
|
0.84615385 0.84615385 0.71428571 0.85714286]
|
|
|
|
mean value: 0.8121545121545122
|
|
|
|
key: train_precision
|
|
value: [0.8952381 0.90740741 0.92592593 0.92380952 0.91346154 0.92307692
|
|
0.90740741 0.94117647 0.91509434 0.91666667]
|
|
|
|
mean value: 0.9169264298204365
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.90909091 0.72727273 0.63636364 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.90909091 0.54545455]
|
|
|
|
mean value: 0.8037878787878787
|
|
|
|
key: train_recall
|
|
value: [0.91262136 0.95145631 0.97087379 0.94174757 0.93137255 0.94117647
|
|
0.96078431 0.94117647 0.94174757 0.96116505]
|
|
|
|
mean value: 0.9454121454407005
|
|
|
|
key: test_roc_auc
|
|
value: [0.6969697 0.82954545 0.8219697 0.73484848 0.87121212 0.86742424
|
|
0.86742424 0.86742424 0.77272727 0.72727273]
|
|
|
|
mean value: 0.8056818181818182
|
|
|
|
key: train_roc_auc
|
|
value: [0.90238911 0.92670855 0.94622121 0.9316581 0.92199695 0.93175328
|
|
0.93184847 0.94146202 0.92718447 0.9368932 ]
|
|
|
|
mean value: 0.9298115362649915
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.71428571 0.66666667 0.53846154 0.76923077 0.78571429
|
|
0.78571429 0.78571429 0.66666667 0.5 ]
|
|
|
|
mean value: 0.6745787545787546
|
|
|
|
key: train_jcc
|
|
value: [0.8245614 0.86725664 0.9009009 0.87387387 0.85585586 0.87272727
|
|
0.875 0.88888889 0.86607143 0.88392857]
|
|
|
|
mean value: 0.8709064832923705
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.93171263 0.76386046 0.90062022 0.77434134 0.75542283 0.82477236
|
|
0.75631881 0.79757857 0.92382717 0.79171753]
|
|
|
|
mean value: 0.8220171928405762
|
|
|
|
key: score_time
|
|
value: [0.01185131 0.0120995 0.02216887 0.01233625 0.01512313 0.01532745
|
|
0.01551938 0.01230907 0.01238847 0.01561093]
|
|
|
|
mean value: 0.014473438262939453
|
|
|
|
key: test_mcc
|
|
value: [0.82575758 0.74047959 0.76277007 0.56818182 0.76764947 0.82575758
|
|
0.74242424 0.91666667 0.54772256 0.75592895]
|
|
|
|
mean value: 0.7453338517356568
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.91369855 1. ]
|
|
|
|
mean value: 0.9913698554847693
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.86956522 0.86956522 0.7826087 0.86956522 0.91304348
|
|
0.86956522 0.95652174 0.77272727 0.86363636]
|
|
|
|
mean value: 0.8679841897233201
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.95631068 1. ]
|
|
|
|
mean value: 0.9956310679611651
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.85714286 0.84210526 0.7826087 0.85714286 0.91666667
|
|
0.86956522 0.95652174 0.7826087 0.84210526]
|
|
|
|
mean value: 0.8615558164185166
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.95734597 1. ]
|
|
|
|
mean value: 0.9957345971563981
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.9 1. 0.75 1. 0.91666667
|
|
0.90909091 1. 0.75 1. ]
|
|
|
|
mean value: 0.9134848484848485
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.93518519 1. ]
|
|
|
|
mean value: 0.9935185185185185
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.81818182 0.72727273 0.81818182 0.75 0.91666667
|
|
0.83333333 0.91666667 0.81818182 0.72727273]
|
|
|
|
mean value: 0.8234848484848485
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.98058252 1. ]
|
|
|
|
mean value: 0.9980582524271845
|
|
|
|
key: test_roc_auc
|
|
value: [0.91287879 0.86742424 0.86363636 0.78409091 0.875 0.91287879
|
|
0.87121212 0.95833333 0.77272727 0.86363636]
|
|
|
|
mean value: 0.8681818181818182
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.95631068 1. ]
|
|
|
|
mean value: 0.9956310679611651
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.75 0.72727273 0.64285714 0.75 0.84615385
|
|
0.76923077 0.91666667 0.64285714 0.72727273]
|
|
|
|
mean value: 0.7605644355644355
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.91818182 1. ]
|
|
|
|
mean value: 0.9918181818181818
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0132544 0.01033616 0.00911188 0.009161 0.00958657 0.00882697
|
|
0.00929618 0.00892305 0.0093689 0.00876236]
|
|
|
|
mean value: 0.009662747383117676
|
|
|
|
key: score_time
|
|
value: [0.01657176 0.00902963 0.00913548 0.00981712 0.00965953 0.00855589
|
|
0.0086391 0.00849652 0.00863767 0.008816 ]
|
|
|
|
mean value: 0.009735870361328124
|
|
|
|
key: test_mcc
|
|
value: [0.11236664 0.56490196 0.65151515 0.06579517 0.22407133 0.50168817
|
|
0.58002308 0.42228828 0.48795004 0.09759001]
|
|
|
|
mean value: 0.37081898188601464
|
|
|
|
key: train_mcc
|
|
value: [0.41031528 0.49366174 0.51698955 0.40881923 0.40551208 0.45203295
|
|
0.49026396 0.44322953 0.45669396 0.43639645]
|
|
|
|
mean value: 0.45139147236435284
|
|
|
|
key: test_accuracy
|
|
value: [0.52173913 0.7826087 0.82608696 0.52173913 0.60869565 0.73913043
|
|
0.7826087 0.69565217 0.72727273 0.54545455]
|
|
|
|
mean value: 0.675098814229249
|
|
|
|
key: train_accuracy
|
|
value: [0.68292683 0.74634146 0.74146341 0.67317073 0.68780488 0.71707317
|
|
0.73170732 0.71219512 0.7184466 0.70873786]
|
|
|
|
mean value: 0.7119867392848686
|
|
|
|
key: test_fscore
|
|
value: [0.64516129 0.76190476 0.81818182 0.59259259 0.68965517 0.78571429
|
|
0.81481481 0.75862069 0.76923077 0.61538462]
|
|
|
|
mean value: 0.7251260810215204
|
|
|
|
key: train_fscore
|
|
value: [0.743083 0.74 0.781893 0.74329502 0.73553719 0.75
|
|
0.76793249 0.74678112 0.75423729 0.74576271]
|
|
|
|
mean value: 0.7508521822638834
|
|
|
|
key: test_precision
|
|
value: [0.5 0.8 0.81818182 0.5 0.58823529 0.6875
|
|
0.73333333 0.64705882 0.66666667 0.53333333]
|
|
|
|
mean value: 0.6474309269162211
|
|
|
|
key: train_precision
|
|
value: [0.62666667 0.7628866 0.67857143 0.61392405 0.63571429 0.66923077
|
|
0.67407407 0.66412214 0.66917293 0.66165414]
|
|
|
|
mean value: 0.6656017077902033
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.72727273 0.81818182 0.72727273 0.83333333 0.91666667
|
|
0.91666667 0.91666667 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8401515151515151
|
|
|
|
key: train_recall
|
|
value: [0.91262136 0.7184466 0.9223301 0.94174757 0.87254902 0.85294118
|
|
0.89215686 0.85294118 0.86407767 0.85436893]
|
|
|
|
mean value: 0.8684180468303826
|
|
|
|
key: test_roc_auc
|
|
value: [0.53787879 0.78030303 0.82575758 0.53030303 0.59848485 0.73106061
|
|
0.77651515 0.68560606 0.72727273 0.54545455]
|
|
|
|
mean value: 0.6738636363636363
|
|
|
|
key: train_roc_auc
|
|
value: [0.68180088 0.7464782 0.74057681 0.67185418 0.68870169 0.71773272
|
|
0.7324862 0.71287836 0.7184466 0.70873786]
|
|
|
|
mean value: 0.711969350847135
|
|
|
|
key: test_jcc
|
|
value: [0.47619048 0.61538462 0.69230769 0.42105263 0.52631579 0.64705882
|
|
0.6875 0.61111111 0.625 0.44444444]
|
|
|
|
mean value: 0.5746365584020383
|
|
|
|
key: train_jcc
|
|
value: [0.59119497 0.58730159 0.64189189 0.59146341 0.58169935 0.6
|
|
0.62328767 0.59589041 0.60544218 0.59459459]
|
|
|
|
mean value: 0.6012766062443438
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01010966 0.00937247 0.0098629 0.00893044 0.00894618 0.0098803
|
|
0.00925088 0.00918436 0.00915742 0.00916314]
|
|
|
|
mean value: 0.009385776519775391
|
|
|
|
key: score_time
|
|
value: [0.00906849 0.00885463 0.00871778 0.00924611 0.00887156 0.00888371
|
|
0.00867105 0.00927925 0.00878453 0.00857878]
|
|
|
|
mean value: 0.008895587921142579
|
|
|
|
key: test_mcc
|
|
value: [0.21969697 0.55048188 0.22407133 0.21452908 0.3030303 0.3030303
|
|
0.33371191 0.39393939 0.09090909 0.32539569]
|
|
|
|
mean value: 0.29587959510446155
|
|
|
|
key: train_mcc
|
|
value: [0.44146616 0.44911432 0.45709726 0.49637007 0.4861007 0.48652841
|
|
0.43786483 0.44832571 0.49218702 0.50892419]
|
|
|
|
mean value: 0.4703978666309494
|
|
|
|
key: test_accuracy
|
|
value: [0.60869565 0.73913043 0.60869565 0.60869565 0.65217391 0.65217391
|
|
0.65217391 0.69565217 0.54545455 0.63636364]
|
|
|
|
mean value: 0.6399209486166008
|
|
|
|
key: train_accuracy
|
|
value: [0.71707317 0.72195122 0.72682927 0.74634146 0.73658537 0.74146341
|
|
0.71707317 0.72195122 0.74271845 0.75242718]
|
|
|
|
mean value: 0.7324413923750888
|
|
|
|
key: test_fscore
|
|
value: [0.60869565 0.625 0.47058824 0.52631579 0.66666667 0.66666667
|
|
0.6 0.69565217 0.54545455 0.5 ]
|
|
|
|
mean value: 0.5905039729642637
|
|
|
|
key: train_fscore
|
|
value: [0.69148936 0.70157068 0.71134021 0.73195876 0.7 0.72251309
|
|
0.69473684 0.6984127 0.71957672 0.7357513 ]
|
|
|
|
mean value: 0.7107349655839269
|
|
|
|
key: test_precision
|
|
value: [0.58333333 1. 0.66666667 0.625 0.66666667 0.66666667
|
|
0.75 0.72727273 0.54545455 0.8 ]
|
|
|
|
mean value: 0.7031060606060606
|
|
|
|
key: train_precision
|
|
value: [0.76470588 0.76136364 0.75824176 0.78021978 0.80769231 0.7752809
|
|
0.75 0.75862069 0.79069767 0.78888889]
|
|
|
|
mean value: 0.7735711516709494
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.45454545 0.36363636 0.45454545 0.66666667 0.66666667
|
|
0.5 0.66666667 0.54545455 0.36363636]
|
|
|
|
mean value: 0.5318181818181817
|
|
|
|
key: train_recall
|
|
value: [0.63106796 0.65048544 0.66990291 0.68932039 0.61764706 0.67647059
|
|
0.64705882 0.64705882 0.66019417 0.68932039]
|
|
|
|
mean value: 0.657852655625357
|
|
|
|
key: test_roc_auc
|
|
value: [0.60984848 0.72727273 0.59848485 0.60227273 0.65151515 0.65151515
|
|
0.65909091 0.6969697 0.54545455 0.63636364]
|
|
|
|
mean value: 0.6378787878787878
|
|
|
|
key: train_roc_auc
|
|
value: [0.71749476 0.72230154 0.72710832 0.74662098 0.736008 0.74114792
|
|
0.7167333 0.72158766 0.74271845 0.75242718]
|
|
|
|
mean value: 0.732414810584428
|
|
|
|
key: test_jcc
|
|
value: [0.4375 0.45454545 0.30769231 0.35714286 0.5 0.5
|
|
0.42857143 0.53333333 0.375 0.33333333]
|
|
|
|
mean value: 0.42271187146187145
|
|
|
|
key: train_jcc
|
|
value: [0.52845528 0.54032258 0.552 0.57723577 0.53846154 0.56557377
|
|
0.53225806 0.53658537 0.56198347 0.58196721]
|
|
|
|
mean value: 0.5514843061067994
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.0090549 0.00948048 0.00953913 0.00963783 0.00954986 0.00989366
|
|
0.00965118 0.00976753 0.00973725 0.00958776]
|
|
|
|
mean value: 0.009589958190917968
|
|
|
|
key: score_time
|
|
value: [0.01488662 0.01073503 0.01059341 0.01062608 0.01254487 0.01061916
|
|
0.01115012 0.01079106 0.01064491 0.01087499]
|
|
|
|
mean value: 0.01134662628173828
|
|
|
|
key: test_mcc
|
|
value: [ 0.04545455 0.03178209 -0.06579517 0.30240737 0.15096491 -0.31298622
|
|
0.13740858 0.31252706 0.18898224 -0.09245003]
|
|
|
|
mean value: 0.06982953647576104
|
|
|
|
key: train_mcc
|
|
value: [0.58048549 0.45409531 0.47798272 0.45409531 0.51440766 0.52244835
|
|
0.48780456 0.43416169 0.52548679 0.56526885]
|
|
|
|
mean value: 0.5016236722758309
|
|
|
|
key: test_accuracy
|
|
value: [0.52173913 0.52173913 0.47826087 0.65217391 0.56521739 0.34782609
|
|
0.56521739 0.65217391 0.59090909 0.45454545]
|
|
|
|
mean value: 0.5349802371541502
|
|
|
|
key: train_accuracy
|
|
value: [0.7902439 0.72682927 0.73658537 0.72682927 0.75609756 0.76097561
|
|
0.74146341 0.71707317 0.76213592 0.7815534 ]
|
|
|
|
mean value: 0.7499786881363959
|
|
|
|
key: test_fscore
|
|
value: [0.52173913 0.42105263 0.33333333 0.6 0.5 0.4
|
|
0.54545455 0.71428571 0.52631579 0.4 ]
|
|
|
|
mean value: 0.4962181144561007
|
|
|
|
key: train_fscore
|
|
value: [0.79227053 0.72277228 0.71875 0.72277228 0.74226804 0.75376884
|
|
0.71957672 0.71287129 0.75376884 0.7715736 ]
|
|
|
|
mean value: 0.7410392426302083
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.42857143 0.66666667 0.625 0.38461538
|
|
0.6 0.625 0.625 0.44444444]
|
|
|
|
mean value: 0.5399297924297924
|
|
|
|
key: train_precision
|
|
value: [0.78846154 0.73737374 0.7752809 0.73737374 0.7826087 0.77319588
|
|
0.7816092 0.72 0.78125 0.80851064]
|
|
|
|
mean value: 0.7685664317726423
|
|
|
|
key: test_recall
|
|
value: [0.54545455 0.36363636 0.27272727 0.54545455 0.41666667 0.41666667
|
|
0.5 0.83333333 0.45454545 0.36363636]
|
|
|
|
mean value: 0.4712121212121212
|
|
|
|
key: train_recall
|
|
value: [0.7961165 0.70873786 0.66990291 0.70873786 0.70588235 0.73529412
|
|
0.66666667 0.70588235 0.72815534 0.73786408]
|
|
|
|
mean value: 0.7163240053302875
|
|
|
|
key: test_roc_auc
|
|
value: [0.52272727 0.51515152 0.46969697 0.64772727 0.5719697 0.34469697
|
|
0.56818182 0.64393939 0.59090909 0.45454545]
|
|
|
|
mean value: 0.5329545454545455
|
|
|
|
key: train_roc_auc
|
|
value: [0.79021512 0.72691795 0.73691224 0.72691795 0.7558538 0.76085094
|
|
0.74110032 0.71701885 0.76213592 0.7815534 ]
|
|
|
|
mean value: 0.7499476489624977
|
|
|
|
key: test_jcc
|
|
value: [0.35294118 0.26666667 0.2 0.42857143 0.33333333 0.25
|
|
0.375 0.55555556 0.35714286 0.25 ]
|
|
|
|
mean value: 0.33692110177404294
|
|
|
|
key: train_jcc
|
|
value: [0.656 0.56589147 0.56097561 0.56589147 0.59016393 0.60483871
|
|
0.56198347 0.55384615 0.60483871 0.62809917]
|
|
|
|
mean value: 0.5892528707747853
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01453853 0.01178885 0.01168919 0.01257873 0.01363349 0.01344967
|
|
0.01352501 0.01355839 0.0126698 0.01276493]
|
|
|
|
mean value: 0.013019657135009766
|
|
|
|
key: score_time
|
|
value: [0.01078439 0.00963712 0.00947428 0.01007676 0.01063275 0.01030612
|
|
0.01044083 0.01055479 0.01006365 0.00953841]
|
|
|
|
mean value: 0.010150909423828125
|
|
|
|
key: test_mcc
|
|
value: [0.31298622 0.74242424 0.50168817 0.12878788 0.66414149 0.30240737
|
|
0.38932432 0.65151515 0.27272727 0.27272727]
|
|
|
|
mean value: 0.42387293810042725
|
|
|
|
key: train_mcc
|
|
value: [0.72894414 0.76647632 0.80552394 0.73821604 0.76638754 0.78600013
|
|
0.77647587 0.7954287 0.71848046 0.738735 ]
|
|
|
|
mean value: 0.7620668135550838
|
|
|
|
key: test_accuracy
|
|
value: [0.65217391 0.86956522 0.73913043 0.56521739 0.82608696 0.65217391
|
|
0.69565217 0.82608696 0.63636364 0.63636364]
|
|
|
|
mean value: 0.7098814229249012
|
|
|
|
key: train_accuracy
|
|
value: [0.86341463 0.88292683 0.90243902 0.86829268 0.88292683 0.89268293
|
|
0.88780488 0.89756098 0.8592233 0.86893204]
|
|
|
|
mean value: 0.880620412029363
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.86956522 0.66666667 0.54545455 0.81818182 0.69230769
|
|
0.72 0.83333333 0.63636364 0.63636364]
|
|
|
|
mean value: 0.70849032127293
|
|
|
|
key: train_fscore
|
|
value: [0.86915888 0.88118812 0.9009901 0.87323944 0.88 0.89423077
|
|
0.88442211 0.89552239 0.85853659 0.87203791]
|
|
|
|
mean value: 0.8809326300847204
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.83333333 0.85714286 0.54545455 0.9 0.64285714
|
|
0.69230769 0.83333333 0.63636364 0.63636364]
|
|
|
|
mean value: 0.7192540792540792
|
|
|
|
key: train_precision
|
|
value: [0.83783784 0.8989899 0.91919192 0.84545455 0.89795918 0.87735849
|
|
0.90721649 0.90909091 0.8627451 0.85185185]
|
|
|
|
mean value: 0.8807696229541047
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.90909091 0.54545455 0.54545455 0.75 0.75
|
|
0.75 0.83333333 0.63636364 0.63636364]
|
|
|
|
mean value: 0.7083333333333334
|
|
|
|
key: train_recall
|
|
value: [0.90291262 0.86407767 0.88349515 0.90291262 0.8627451 0.91176471
|
|
0.8627451 0.88235294 0.85436893 0.89320388]
|
|
|
|
mean value: 0.8820578716923663
|
|
|
|
key: test_roc_auc
|
|
value: [0.65530303 0.87121212 0.73106061 0.56439394 0.82954545 0.64772727
|
|
0.69318182 0.82575758 0.63636364 0.63636364]
|
|
|
|
mean value: 0.709090909090909
|
|
|
|
key: train_roc_auc
|
|
value: [0.86322102 0.88301923 0.90253189 0.86812298 0.88282886 0.89277556
|
|
0.88768323 0.89748715 0.8592233 0.86893204]
|
|
|
|
mean value: 0.8805825242718447
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.76923077 0.5 0.375 0.69230769 0.52941176
|
|
0.5625 0.71428571 0.46666667 0.46666667]
|
|
|
|
mean value: 0.5576069273863391
|
|
|
|
key: train_jcc
|
|
value: [0.76859504 0.78761062 0.81981982 0.775 0.78571429 0.80869565
|
|
0.79279279 0.81081081 0.75213675 0.77310924]
|
|
|
|
mean value: 0.7874285017937194
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.32719898 0.17454052 0.83057833 0.54179835 0.57541323 0.82422447
|
|
0.67535782 0.35218549 0.46702242 1.07017779]
|
|
|
|
mean value: 0.5838497400283813
|
|
|
|
key: score_time
|
|
value: [0.01227832 0.01222968 0.01220989 0.01225519 0.01225281 0.01271915
|
|
0.01260519 0.0125823 0.01266718 0.0126586 ]
|
|
|
|
mean value: 0.012445831298828125
|
|
|
|
key: test_mcc
|
|
value: [0.44411739 0.12878788 0.58002308 0.30240737 0.56879646 0.56490196
|
|
0.65909298 0.50168817 0.13245324 0.46225016]
|
|
|
|
mean value: 0.43445186796951096
|
|
|
|
key: train_mcc
|
|
value: [0.52539178 0.50494514 0.92351163 0.73838965 0.65067908 0.85702512
|
|
0.79260855 0.58203168 0.58157543 0.93243443]
|
|
|
|
mean value: 0.7088592486777825
|
|
|
|
key: test_accuracy
|
|
value: [0.69565217 0.56521739 0.7826087 0.65217391 0.73913043 0.7826087
|
|
0.82608696 0.73913043 0.54545455 0.72727273]
|
|
|
|
mean value: 0.7055335968379447
|
|
|
|
key: train_accuracy
|
|
value: [0.74634146 0.74634146 0.96097561 0.86829268 0.80487805 0.92682927
|
|
0.88780488 0.7804878 0.75728155 0.96601942]
|
|
|
|
mean value: 0.8445252190385981
|
|
|
|
key: test_fscore
|
|
value: [0.74074074 0.54545455 0.73684211 0.6 0.66666667 0.8
|
|
0.84615385 0.78571429 0.28571429 0.7 ]
|
|
|
|
mean value: 0.6707286475707528
|
|
|
|
key: train_fscore
|
|
value: [0.78512397 0.7173913 0.96226415 0.86432161 0.76190476 0.92957746
|
|
0.89777778 0.80519481 0.6835443 0.96650718]
|
|
|
|
mean value: 0.8373607320770611
|
|
|
|
key: test_precision
|
|
value: [0.625 0.54545455 0.875 0.66666667 1. 0.76923077
|
|
0.78571429 0.6875 0.66666667 0.77777778]
|
|
|
|
mean value: 0.7399010711510712
|
|
|
|
key: train_precision
|
|
value: [0.68345324 0.81481481 0.93577982 0.89583333 0.96969697 0.89189189
|
|
0.82113821 0.72093023 0.98181818 0.95283019]
|
|
|
|
mean value: 0.8668186878098524
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.54545455 0.63636364 0.54545455 0.5 0.83333333
|
|
0.91666667 0.91666667 0.18181818 0.63636364]
|
|
|
|
mean value: 0.6621212121212121
|
|
|
|
key: train_recall
|
|
value: [0.9223301 0.6407767 0.99029126 0.83495146 0.62745098 0.97058824
|
|
0.99019608 0.91176471 0.52427184 0.98058252]
|
|
|
|
mean value: 0.8393203883495146
|
|
|
|
key: test_roc_auc
|
|
value: [0.70454545 0.56439394 0.77651515 0.64772727 0.75 0.78030303
|
|
0.8219697 0.73106061 0.54545455 0.72727273]
|
|
|
|
mean value: 0.7049242424242423
|
|
|
|
key: train_roc_auc
|
|
value: [0.74547877 0.74685894 0.96083191 0.86845612 0.80401675 0.92704169
|
|
0.88830192 0.78112507 0.75728155 0.96601942]
|
|
|
|
mean value: 0.84454121454407
|
|
|
|
key: test_jcc
|
|
value: [0.58823529 0.375 0.58333333 0.42857143 0.5 0.66666667
|
|
0.73333333 0.64705882 0.16666667 0.53846154]
|
|
|
|
mean value: 0.5227327084680026
|
|
|
|
key: train_jcc
|
|
value: [0.6462585 0.55932203 0.92727273 0.76106195 0.61538462 0.86842105
|
|
0.81451613 0.67391304 0.51923077 0.93518519]
|
|
|
|
mean value: 0.7320566006417716
|
|
|
|
MCC on Blind test: 0.31
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01866984 0.01328373 0.01333165 0.01289773 0.01274443 0.01260185
|
|
0.01383185 0.01282859 0.01323938 0.01250935]
|
|
|
|
mean value: 0.013593840599060058
|
|
|
|
key: score_time
|
|
value: [0.01174784 0.00923467 0.00871539 0.00866914 0.0085988 0.00864315
|
|
0.00875974 0.00851679 0.00875902 0.00885749]
|
|
|
|
mean value: 0.009050202369689942
|
|
|
|
key: test_mcc
|
|
value: [0.82575758 0.91605722 0.69084928 0.76764947 0.76764947 0.91666667
|
|
0.74242424 1. 0.91287093 0.75592895]
|
|
|
|
mean value: 0.8295853811736139
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.95652174 0.82608696 0.86956522 0.86956522 0.95652174
|
|
0.86956522 1. 0.95454545 0.86363636]
|
|
|
|
mean value: 0.9079051383399209
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.95238095 0.77777778 0.88 0.85714286 0.95652174
|
|
0.86956522 1. 0.95238095 0.84210526]
|
|
|
|
mean value: 0.8996965668453083
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90909091 1. 1. 0.78571429 1. 1.
|
|
0.90909091 1. 1. 1. ]
|
|
|
|
mean value: 0.9603896103896103
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.90909091 0.63636364 1. 0.75 0.91666667
|
|
0.83333333 1. 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8590909090909091
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91287879 0.95454545 0.81818182 0.875 0.875 0.95833333
|
|
0.87121212 1. 0.95454545 0.86363636]
|
|
|
|
mean value: 0.9083333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.90909091 0.63636364 0.78571429 0.75 0.91666667
|
|
0.76923077 1. 0.90909091 0.72727273]
|
|
|
|
mean value: 0.8236763236763237
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10137177 0.09971786 0.09696341 0.09599876 0.10285091 0.09974742
|
|
0.10015845 0.10240602 0.10212231 0.09918237]
|
|
|
|
mean value: 0.10005192756652832
|
|
|
|
key: score_time
|
|
value: [0.01733947 0.0176208 0.01726961 0.01758814 0.01826119 0.01933861
|
|
0.01860666 0.01895905 0.0190897 0.01898289]
|
|
|
|
mean value: 0.0183056116104126
|
|
|
|
key: test_mcc
|
|
value: [0.74242424 0.91666667 0.65909298 0.39393939 0.74047959 0.56490196
|
|
0.76277007 1. 0.73029674 0.54772256]
|
|
|
|
mean value: 0.7058294203018629
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.95652174 0.82608696 0.69565217 0.86956522 0.7826087
|
|
0.86956522 1. 0.86363636 0.77272727]
|
|
|
|
mean value: 0.850592885375494
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.95652174 0.8 0.69565217 0.88 0.8
|
|
0.88888889 1. 0.86956522 0.76190476]
|
|
|
|
mean value: 0.8522097998619738
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.91666667 0.88888889 0.66666667 0.84615385 0.76923077
|
|
0.8 1. 0.83333333 0.8 ]
|
|
|
|
mean value: 0.8354273504273504
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.90909091 1. 0.72727273 0.72727273 0.91666667 0.83333333
|
|
1. 1. 0.90909091 0.72727273]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.87121212 0.95833333 0.8219697 0.6969697 0.86742424 0.78030303
|
|
0.86363636 1. 0.86363636 0.77272727]
|
|
|
|
mean value: 0.8496212121212121
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.91666667 0.66666667 0.53333333 0.78571429 0.66666667
|
|
0.8 1. 0.76923077 0.61538462]
|
|
|
|
mean value: 0.7522893772893773
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01011992 0.01058984 0.01019692 0.00972962 0.01009941 0.01027632
|
|
0.01017356 0.01005864 0.0098815 0.0098474 ]
|
|
|
|
mean value: 0.010097312927246093
|
|
|
|
key: score_time
|
|
value: [0.00989771 0.00945807 0.00941896 0.00952983 0.00942016 0.00951624
|
|
0.00943208 0.00933743 0.00939727 0.00861669]
|
|
|
|
mean value: 0.00940244197845459
|
|
|
|
key: test_mcc
|
|
value: [0.47727273 0.82575758 0.56490196 0.30240737 0.44411739 0.66414149
|
|
0.39393939 0.66414149 0.29277002 0.46225016]
|
|
|
|
mean value: 0.5091699576165252
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.73913043 0.91304348 0.7826087 0.65217391 0.69565217 0.82608696
|
|
0.69565217 0.82608696 0.63636364 0.72727273]
|
|
|
|
mean value: 0.7494071146245059
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.90909091 0.76190476 0.6 0.63157895 0.81818182
|
|
0.69565217 0.81818182 0.55555556 0.7 ]
|
|
|
|
mean value: 0.7217418711469055
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.90909091 0.8 0.66666667 0.85714286 0.9
|
|
0.72727273 0.9 0.71428571 0.77777778]
|
|
|
|
mean value: 0.797950937950938
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.90909091 0.72727273 0.54545455 0.5 0.75
|
|
0.66666667 0.75 0.45454545 0.63636364]
|
|
|
|
mean value: 0.6666666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.73863636 0.91287879 0.78030303 0.64772727 0.70454545 0.82954545
|
|
0.6969697 0.82954545 0.63636364 0.72727273]
|
|
|
|
mean value: 0.7503787878787879
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.83333333 0.61538462 0.42857143 0.46153846 0.69230769
|
|
0.53333333 0.69230769 0.38461538 0.53846154]
|
|
|
|
mean value: 0.5751282051282052
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.31818891 1.31016517 1.28691626 1.37142372 1.3797493 1.26361561
|
|
1.31167197 1.2901237 1.27162528 1.29103398]
|
|
|
|
mean value: 1.3094513893127442
|
|
|
|
key: score_time
|
|
value: [0.09341335 0.08867025 0.09690428 0.09699655 0.09698176 0.08863115
|
|
0.09124899 0.0938971 0.09380794 0.09228921]
|
|
|
|
mean value: 0.09328405857086182
|
|
|
|
key: test_mcc
|
|
value: [0.58002308 0.91666667 0.91605722 0.47727273 0.76764947 0.65151515
|
|
0.91605722 0.91666667 0.81818182 0.83205029]
|
|
|
|
mean value: 0.7792140323001392
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.95652174 0.95652174 0.73913043 0.86956522 0.82608696
|
|
0.95652174 0.95652174 0.90909091 0.90909091]
|
|
|
|
mean value: 0.8861660079051383
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.95652174 0.95238095 0.72727273 0.85714286 0.83333333
|
|
0.96 0.95652174 0.90909091 0.9 ]
|
|
|
|
mean value: 0.8789106362744806
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.91666667 1. 0.72727273 1. 0.83333333
|
|
0.92307692 1. 0.90909091 1. ]
|
|
|
|
mean value: 0.918444055944056
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.63636364 1. 0.90909091 0.72727273 0.75 0.83333333
|
|
1. 0.91666667 0.90909091 0.81818182]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.77651515 0.95833333 0.95454545 0.73863636 0.875 0.82575758
|
|
0.95454545 0.95833333 0.90909091 0.90909091]
|
|
|
|
mean value: 0.8859848484848485
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.91666667 0.90909091 0.57142857 0.75 0.71428571
|
|
0.92307692 0.91666667 0.83333333 0.81818182]
|
|
|
|
mean value: 0.7936063936063936
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...05', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.92155814 0.86642456 0.9216938 0.91805387 0.88870335 0.92368603
|
|
0.926301 0.83419299 0.89840913 0.83216953]
|
|
|
|
mean value: 0.8931192398071289
|
|
|
|
key: score_time
|
|
value: [0.24260831 0.20261288 0.24647403 0.1979568 0.2496202 0.20900178
|
|
0.22739148 0.21628428 0.12800908 0.18758345]
|
|
|
|
mean value: 0.21075422763824464
|
|
|
|
key: test_mcc
|
|
value: [0.56490196 0.83971912 0.82575758 0.47727273 0.74242424 0.66414149
|
|
0.65909298 0.65151515 0.64715023 0.63636364]
|
|
|
|
mean value: 0.6708339110699807
|
|
|
|
key: train_mcc
|
|
value: [0.96097468 0.9516192 0.96170013 0.98048734 0.9707786 0.9707786
|
|
0.95163291 0.94219063 0.94245853 0.9613463 ]
|
|
|
|
mean value: 0.9593966922193641
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.91304348 0.91304348 0.73913043 0.86956522 0.82608696
|
|
0.82608696 0.82608696 0.81818182 0.81818182]
|
|
|
|
mean value: 0.833201581027668
|
|
|
|
key: train_accuracy
|
|
value: [0.9804878 0.97560976 0.9804878 0.9902439 0.98536585 0.98536585
|
|
0.97560976 0.97073171 0.97087379 0.98058252]
|
|
|
|
mean value: 0.9795358749704002
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.91666667 0.90909091 0.72727273 0.86956522 0.81818182
|
|
0.84615385 0.83333333 0.83333333 0.81818182]
|
|
|
|
mean value: 0.8333684431510519
|
|
|
|
key: train_fscore
|
|
value: [0.98058252 0.97607656 0.98095238 0.99029126 0.98536585 0.98536585
|
|
0.97584541 0.97115385 0.97142857 0.98076923]
|
|
|
|
mean value: 0.9797831488680813
|
|
|
|
key: test_precision
|
|
value: [0.8 0.84615385 0.90909091 0.72727273 0.90909091 0.9
|
|
0.78571429 0.83333333 0.76923077 0.81818182]
|
|
|
|
mean value: 0.8298068598068599
|
|
|
|
key: train_precision /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
value: [0.98058252 0.96226415 0.96261682 0.99029126 0.98058252 0.98058252
|
|
0.96190476 0.95283019 0.95327103 0.97142857]
|
|
|
|
mean value: 0.9696354358374721
|
|
|
|
key: test_recall
|
|
value: [0.72727273 1. 0.90909091 0.72727273 0.83333333 0.75
|
|
0.91666667 0.83333333 0.90909091 0.81818182]
|
|
|
|
mean value: 0.8424242424242424
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.99029126 1. 0.99029126 0.99019608 0.99019608
|
|
0.99019608 0.99019608 0.99029126 0.99029126]
|
|
|
|
mean value: 0.9902531886541024
|
|
|
|
key: test_roc_auc
|
|
value: [0.78030303 0.91666667 0.91287879 0.73863636 0.87121212 0.82954545
|
|
0.8219697 0.82575758 0.81818182 0.81818182]
|
|
|
|
mean value: 0.8333333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.98048734 0.97553779 0.98039216 0.99024367 0.9853893 0.9853893
|
|
0.97568056 0.97082619 0.97087379 0.98058252]
|
|
|
|
mean value: 0.9795402627070247
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.84615385 0.83333333 0.57142857 0.76923077 0.69230769
|
|
0.73333333 0.71428571 0.71428571 0.69230769]
|
|
|
|
mean value: 0.7182051282051282
|
|
|
|
key: train_jcc
|
|
value: [0.96190476 0.95327103 0.96261682 0.98076923 0.97115385 0.97115385
|
|
0.95283019 0.94392523 0.94444444 0.96226415]
|
|
|
|
mean value: 0.9604333553160921
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02152944 0.00897455 0.00889492 0.0088706 0.00894356 0.0089643
|
|
0.00907016 0.00900245 0.00898051 0.00887012]
|
|
|
|
mean value: 0.010210061073303222
|
|
|
|
key: score_time
|
|
value: [0.01050448 0.00858045 0.00870132 0.00862598 0.00875401 0.00850987
|
|
0.00866985 0.00858259 0.00861263 0.00853896]
|
|
|
|
mean value: 0.008808016777038574
|
|
|
|
key: test_mcc
|
|
value: [0.21969697 0.55048188 0.22407133 0.21452908 0.3030303 0.3030303
|
|
0.33371191 0.39393939 0.09090909 0.32539569]
|
|
|
|
mean value: 0.29587959510446155
|
|
|
|
key: train_mcc
|
|
value: [0.44146616 0.44911432 0.45709726 0.49637007 0.4861007 0.48652841
|
|
0.43786483 0.44832571 0.49218702 0.50892419]
|
|
|
|
mean value: 0.4703978666309494
|
|
|
|
key: test_accuracy
|
|
value: [0.60869565 0.73913043 0.60869565 0.60869565 0.65217391 0.65217391
|
|
0.65217391 0.69565217 0.54545455 0.63636364]
|
|
|
|
mean value: 0.6399209486166008
|
|
|
|
key: train_accuracy
|
|
value: [0.71707317 0.72195122 0.72682927 0.74634146 0.73658537 0.74146341
|
|
0.71707317 0.72195122 0.74271845 0.75242718]
|
|
|
|
mean value: 0.7324413923750888
|
|
|
|
key: test_fscore
|
|
value: [0.60869565 0.625 0.47058824 0.52631579 0.66666667 0.66666667
|
|
0.6 0.69565217 0.54545455 0.5 ]
|
|
|
|
mean value: 0.5905039729642637
|
|
|
|
key: train_fscore
|
|
value: [0.69148936 0.70157068 0.71134021 0.73195876 0.7 0.72251309
|
|
0.69473684 0.6984127 0.71957672 0.7357513 ]
|
|
|
|
mean value: 0.7107349655839269
|
|
|
|
key: test_precision
|
|
value: [0.58333333 1. 0.66666667 0.625 0.66666667 0.66666667
|
|
0.75 0.72727273 0.54545455 0.8 ]
|
|
|
|
mean value: 0.7031060606060606
|
|
|
|
key: train_precision
|
|
value: [0.76470588 0.76136364 0.75824176 0.78021978 0.80769231 0.7752809
|
|
0.75 0.75862069 0.79069767 0.78888889]
|
|
|
|
mean value: 0.7735711516709494
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.45454545 0.36363636 0.45454545 0.66666667 0.66666667
|
|
0.5 0.66666667 0.54545455 0.36363636]
|
|
|
|
mean value: 0.5318181818181817
|
|
|
|
key: train_recall
|
|
value: [0.63106796 0.65048544 0.66990291 0.68932039 0.61764706 0.67647059
|
|
0.64705882 0.64705882 0.66019417 0.68932039]
|
|
|
|
mean value: 0.657852655625357
|
|
|
|
key: test_roc_auc
|
|
value: [0.60984848 0.72727273 0.59848485 0.60227273 0.65151515 0.65151515
|
|
0.65909091 0.6969697 0.54545455 0.63636364]
|
|
|
|
mean value: 0.6378787878787878
|
|
|
|
key: train_roc_auc
|
|
value: [0.71749476 0.72230154 0.72710832 0.74662098 0.736008 0.74114792
|
|
0.7167333 0.72158766 0.74271845 0.75242718]
|
|
|
|
mean value: 0.732414810584428
|
|
|
|
key: test_jcc
|
|
value: [0.4375 0.45454545 0.30769231 0.35714286 0.5 0.5
|
|
0.42857143 0.53333333 0.375 0.33333333]
|
|
|
|
mean value: 0.42271187146187145
|
|
|
|
key: train_jcc
|
|
value: [0.52845528 0.54032258 0.552 0.57723577 0.53846154 0.56557377
|
|
0.53225806 0.53658537 0.56198347 0.58196721]
|
|
|
|
mean value: 0.5514843061067994
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'Z...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.0944972 0.05067468 0.04957175 0.05115271 0.05678248 0.05602765
|
|
0.05684161 0.07069731 0.04913449 0.06043005]
|
|
|
|
mean value: 0.05958099365234375
|
|
|
|
key: score_time
|
|
value: [0.01044273 0.01050806 0.01055908 0.01056576 0.01026511 0.0102632
|
|
0.01027846 0.01120543 0.0102067 0.01039171]
|
|
|
|
mean value: 0.010468626022338867
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 1. 0.91605722 0.6992059 0.83971912 0.83971912
|
|
0.91605722 0.91666667 1. 0.91287093]
|
|
|
|
mean value: 0.8780775779868542
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 1. 0.95652174 0.82608696 0.91304348 0.91304348
|
|
0.95652174 0.95652174 1. 0.95454545]
|
|
|
|
mean value: 0.9345849802371542
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 1. 0.95238095 0.84615385 0.90909091 0.90909091
|
|
0.96 0.95652174 1. 0.95238095]
|
|
|
|
mean value: 0.9342762165370861
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 1. 0.73333333 1. 1.
|
|
0.92307692 1. 1. 1. ]
|
|
|
|
mean value: 0.9556410256410256
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 1. 0.90909091 1. 0.83333333 0.83333333
|
|
1. 0.91666667 1. 0.90909091]
|
|
|
|
mean value: 0.921969696969697
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 1. 0.95454545 0.83333333 0.91666667 0.91666667
|
|
0.95454545 0.95833333 1. 0.95454545]
|
|
|
|
mean value: 0.9356060606060607
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 1. 0.90909091 0.73333333 0.83333333 0.83333333
|
|
0.92307692 0.91666667 1. 0.90909091]
|
|
|
|
mean value: 0.8807925407925408
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0328002 0.05474067 0.06001735 0.05796957 0.03843188 0.02583218
|
|
0.05591583 0.06007648 0.05593419 0.05865741]
|
|
|
|
mean value: 0.050037574768066403
|
|
|
|
key: score_time
|
|
value: [0.02213311 0.02232766 0.02493095 0.0224731 0.01203299 0.01206088
|
|
0.02462769 0.0249722 0.02335501 0.01984525]
|
|
|
|
mean value: 0.020875883102416993
|
|
|
|
key: test_mcc
|
|
value: [0.56490196 0.58002308 0.91666667 0.47727273 0.5164589 0.48856385
|
|
0.56490196 0.58930667 0.63636364 0.45454545]
|
|
|
|
mean value: 0.5789004892930631
|
|
|
|
key: train_mcc
|
|
value: [0.91223227 0.96097468 0.91223227 0.93174679 0.97115114 0.95126131
|
|
0.95163291 0.93175328 0.94174757 0.94192516]
|
|
|
|
mean value: 0.9406657392104807
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.7826087 0.95652174 0.73913043 0.73913043 0.73913043
|
|
0.7826087 0.7826087 0.81818182 0.72727273]
|
|
|
|
mean value: 0.7849802371541502
|
|
|
|
key: train_accuracy
|
|
value: [0.95609756 0.9804878 0.95609756 0.96585366 0.98536585 0.97560976
|
|
0.97560976 0.96585366 0.97087379 0.97087379]
|
|
|
|
mean value: 0.9702723182571631
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.73684211 0.95652174 0.72727273 0.7 0.72727273
|
|
0.8 0.76190476 0.81818182 0.72727273]
|
|
|
|
mean value: 0.7717173368203116
|
|
|
|
key: train_fscore
|
|
value: [0.95652174 0.98058252 0.95652174 0.96618357 0.98550725 0.97536946
|
|
0.97584541 0.96585366 0.97087379 0.97115385]
|
|
|
|
mean value: 0.9704412983643049
|
|
|
|
key: test_precision
|
|
value: [0.8 0.875 0.91666667 0.72727273 0.875 0.8
|
|
0.76923077 0.88888889 0.81818182 0.72727273]
|
|
|
|
mean value: 0.8197513597513597
|
|
|
|
key: train_precision
|
|
value: [0.95192308 0.98058252 0.95192308 0.96153846 0.97142857 0.98019802
|
|
0.96190476 0.96116505 0.97087379 0.96190476]
|
|
|
|
mean value: 0.9653442089647992
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.63636364 1. 0.72727273 0.58333333 0.66666667
|
|
0.83333333 0.66666667 0.81818182 0.72727273]
|
|
|
|
mean value: 0.7386363636363636
|
|
|
|
key: train_recall
|
|
value: [0.96116505 0.98058252 0.96116505 0.97087379 1. 0.97058824
|
|
0.99019608 0.97058824 0.97087379 0.98058252]
|
|
|
|
mean value: 0.975661526746621
|
|
|
|
key: test_roc_auc
|
|
value: [0.78030303 0.77651515 0.95833333 0.73863636 0.74621212 0.74242424
|
|
0.78030303 0.78787879 0.81818182 0.72727273]
|
|
|
|
mean value: 0.7856060606060605
|
|
|
|
key: train_roc_auc
|
|
value: [0.95607272 0.98048734 0.95607272 0.96582905 0.98543689 0.97558538
|
|
0.97568056 0.96587664 0.97087379 0.97087379]
|
|
|
|
mean value: 0.970278888254331
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.58333333 0.91666667 0.57142857 0.53846154 0.57142857
|
|
0.66666667 0.61538462 0.69230769 0.57142857]
|
|
|
|
mean value: 0.6342490842490842
|
|
|
|
key: train_jcc
|
|
value: [0.91666667 0.96190476 0.91666667 0.93457944 0.97142857 0.95192308
|
|
0.95283019 0.93396226 0.94339623 0.94392523]
|
|
|
|
mean value: 0.9427283095732223
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01869988 0.0104475 0.01018763 0.01002216 0.00952911 0.01009011
|
|
0.01007676 0.01010776 0.00896454 0.01016378]
|
|
|
|
mean value: 0.010828924179077149
|
|
|
|
key: score_time
|
|
value: [0.00920248 0.00986409 0.00962067 0.0095036 0.00955296 0.00947595
|
|
0.00958252 0.00949979 0.00944614 0.00946665]
|
|
|
|
mean value: 0.009521484375
|
|
|
|
key: test_mcc
|
|
value: [0.06579517 0.47727273 0.56490196 0.21969697 0.22407133 0.39727608
|
|
0.56818182 0.38932432 0.54772256 0.46225016]
|
|
|
|
mean value: 0.3916493092720405
|
|
|
|
key: train_mcc
|
|
value: [0.48780456 0.42066716 0.48336719 0.46806514 0.42940367 0.42714207
|
|
0.40668817 0.42940367 0.41216105 0.42138641]
|
|
|
|
mean value: 0.43860890996498425
|
|
|
|
key: test_accuracy
|
|
value: [0.52173913 0.73913043 0.7826087 0.60869565 0.60869565 0.69565217
|
|
0.7826087 0.69565217 0.77272727 0.72727273]
|
|
|
|
mean value: 0.6934782608695652
|
|
|
|
key: train_accuracy
|
|
value: [0.74146341 0.70731707 0.74146341 0.73170732 0.71219512 0.71219512
|
|
0.70243902 0.71219512 0.7038835 0.70873786]
|
|
|
|
mean value: 0.7173596968979399
|
|
|
|
key: test_fscore
|
|
value: [0.59259259 0.72727273 0.76190476 0.60869565 0.68965517 0.74074074
|
|
0.7826087 0.72 0.7826087 0.75 ]
|
|
|
|
mean value: 0.7156079038402876
|
|
|
|
key: train_fscore
|
|
value: [0.760181 0.73214286 0.74881517 0.75113122 0.73059361 0.7255814
|
|
0.71361502 0.73059361 0.7239819 0.72727273]
|
|
|
|
mean value: 0.7343908501374309
|
|
|
|
key: test_precision
|
|
value: [0.5 0.72727273 0.8 0.58333333 0.58823529 0.66666667
|
|
0.81818182 0.69230769 0.75 0.69230769]
|
|
|
|
mean value: 0.6818305224187577
|
|
|
|
key: train_precision
|
|
value: [0.71186441 0.67768595 0.73148148 0.70338983 0.68376068 0.69026549
|
|
0.68468468 0.68376068 0.6779661 0.68376068]
|
|
|
|
mean value: 0.6928619993570155
|
|
|
|
key: test_recall
|
|
value: [0.72727273 0.72727273 0.72727273 0.63636364 0.83333333 0.83333333
|
|
0.75 0.75 0.81818182 0.81818182]
|
|
|
|
mean value: 0.7621212121212122
|
|
|
|
key: train_recall
|
|
value: [0.81553398 0.7961165 0.76699029 0.80582524 0.78431373 0.76470588
|
|
0.74509804 0.78431373 0.77669903 0.77669903]
|
|
|
|
mean value: 0.7816295450218922
|
|
|
|
key: test_roc_auc
|
|
value: [0.53030303 0.73863636 0.78030303 0.60984848 0.59848485 0.68939394
|
|
0.78409091 0.69318182 0.77272727 0.72727273]
|
|
|
|
mean value: 0.6924242424242424
|
|
|
|
key: train_roc_auc
|
|
value: [0.74110032 0.70688178 0.74133828 0.73134399 0.71254521 0.71245003
|
|
0.70264611 0.71254521 0.7038835 0.70873786]
|
|
|
|
mean value: 0.7173472301541977
|
|
|
|
key: test_jcc
|
|
value: [0.42105263 0.57142857 0.61538462 0.4375 0.52631579 0.58823529
|
|
0.64285714 0.5625 0.64285714 0.6 ]
|
|
|
|
mean value: 0.5608131187697751
|
|
|
|
key: train_jcc
|
|
value: [0.61313869 0.57746479 0.59848485 0.60144928 0.57553957 0.56934307
|
|
0.55474453 0.57553957 0.56737589 0.57142857]
|
|
|
|
mean value: 0.5804508784595866
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01363993 0.01537251 0.01528311 0.01538348 0.01859713 0.01800203
|
|
0.01694918 0.01621366 0.01610088 0.01708269]
|
|
|
|
mean value: 0.016262459754943847
|
|
|
|
key: score_time
|
|
value: [0.00964856 0.0117321 0.01158285 0.01170731 0.01168036 0.01170659
|
|
0.01175737 0.0116291 0.01162767 0.01166821]
|
|
|
|
mean value: 0.011474013328552246
|
|
|
|
key: test_mcc
|
|
value: [0.69084928 0.22268089 0.50168817 0.31252706 0.50460839 0.82575758
|
|
0.83971912 0.74047959 0.39735971 0.54232614]
|
|
|
|
mean value: 0.5577995920530833
|
|
|
|
key: train_mcc
|
|
value: [0.70109302 0.51269395 0.79525817 0.73218681 0.58583388 0.88020643
|
|
0.75526392 0.86303792 0.57361333 0.82977382]
|
|
|
|
mean value: 0.7228961254133855
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.56521739 0.73913043 0.65217391 0.69565217 0.91304348
|
|
0.91304348 0.86956522 0.63636364 0.72727273]
|
|
|
|
mean value: 0.7537549407114624
|
|
|
|
key: train_accuracy
|
|
value: [0.82926829 0.70731707 0.89268293 0.84878049 0.75609756 0.93658537
|
|
0.86341463 0.92682927 0.74757282 0.90776699]
|
|
|
|
mean value: 0.841631541558134
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.16666667 0.66666667 0.55555556 0.58823529 0.91666667
|
|
0.90909091 0.88 0.42857143 0.625 ]
|
|
|
|
mean value: 0.6514230965113318
|
|
|
|
key: train_fscore
|
|
value: [0.79532164 0.5890411 0.88421053 0.82285714 0.67532468 0.93193717
|
|
0.84090909 0.93150685 0.66233766 0.89839572]
|
|
|
|
mean value: 0.8031841575076744
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.85714286 0.71428571 1. 0.91666667
|
|
1. 0.84615385 1. 1. ]
|
|
|
|
mean value: 0.9334249084249084
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.96551724 1. 1. 1.
|
|
1. 0.87179487 1. 1. ]
|
|
|
|
mean value: 0.9837312113174183
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.09090909 0.54545455 0.45454545 0.41666667 0.91666667
|
|
0.83333333 0.91666667 0.27272727 0.45454545]
|
|
|
|
mean value: 0.5537878787878788
|
|
|
|
key: train_recall
|
|
value: [0.66019417 0.41747573 0.81553398 0.69902913 0.50980392 0.87254902
|
|
0.7254902 1. 0.49514563 0.81553398]
|
|
|
|
mean value: 0.7010755758614126
|
|
|
|
key: test_roc_auc
|
|
value: [0.81818182 0.54545455 0.73106061 0.64393939 0.70833333 0.91287879
|
|
0.91666667 0.86742424 0.63636364 0.72727273]
|
|
|
|
mean value: 0.7507575757575757
|
|
|
|
key: train_roc_auc
|
|
value: [0.83009709 0.70873786 0.89306111 0.84951456 0.75490196 0.93627451
|
|
0.8627451 0.92718447 0.74757282 0.90776699]
|
|
|
|
mean value: 0.841785646297354
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.09090909 0.5 0.38461538 0.41666667 0.84615385
|
|
0.83333333 0.78571429 0.27272727 0.45454545]
|
|
|
|
mean value: 0.5221028971028971
|
|
|
|
key: train_jcc
|
|
value: [0.66019417 0.41747573 0.79245283 0.69902913 0.50980392 0.87254902
|
|
0.7254902 0.87179487 0.49514563 0.81553398]
|
|
|
|
mean value: 0.6859469480015152
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01696992 0.01531029 0.01493359 0.01531291 0.01478624 0.01416588
|
|
0.01495624 0.01490998 0.01478863 0.01566005]
|
|
|
|
mean value: 0.01517937183380127
|
|
|
|
key: score_time
|
|
value: [0.01180148 0.01169944 0.01167941 0.01164746 0.01165843 0.01172638
|
|
0.01163912 0.01172733 0.01157904 0.01167846]
|
|
|
|
mean value: 0.01168365478515625
|
|
|
|
key: test_mcc
|
|
value: [0.39393939 0.6992059 0.32232919 0.56879646 0.76764947 0.82575758
|
|
0.76764947 0.91666667 0.64715023 0.23570226]
|
|
|
|
mean value: 0.6144846616230837
|
|
|
|
key: train_mcc
|
|
value: [0.87817847 0.81217608 0.3623663 0.70796649 0.92194936 0.86485629
|
|
0.66933669 0.8742382 0.85045167 0.56613852]
|
|
|
|
mean value: 0.7507658057776959
|
|
|
|
key: test_accuracy
|
|
value: [0.69565217 0.82608696 0.60869565 0.73913043 0.86956522 0.91304348
|
|
0.86956522 0.95652174 0.81818182 0.59090909]
|
|
|
|
mean value: 0.7887351778656126
|
|
|
|
key: train_accuracy
|
|
value: [0.93658537 0.89756098 0.61463415 0.83414634 0.96097561 0.93170732
|
|
0.8097561 0.93658537 0.9223301 0.74271845]
|
|
|
|
mean value: 0.8586999763201516
|
|
|
|
key: test_fscore
|
|
value: [0.69565217 0.84615385 0.30769231 0.78571429 0.85714286 0.91666667
|
|
0.85714286 0.95652174 0.8 0.68965517]
|
|
|
|
mean value: 0.7712341905970092
|
|
|
|
key: train_fscore
|
|
value: [0.94009217 0.90748899 0.37795276 0.85833333 0.96078431 0.92929293
|
|
0.76363636 0.93779904 0.91752577 0.7953668 ]
|
|
|
|
mean value: 0.8388272460201259
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.73333333 1. 0.64705882 1. 0.91666667
|
|
1. 1. 0.88888889 0.55555556]
|
|
|
|
mean value: 0.8408169934640523
|
|
|
|
key: train_precision
|
|
value: [0.89473684 0.83064516 1. 0.75182482 0.96078431 0.95833333
|
|
1. 0.91588785 0.97802198 0.66025641]
|
|
|
|
mean value: 0.8950490706718336
|
|
|
|
key: test_recall
|
|
value: [0.72727273 1. 0.18181818 1. 0.75 0.91666667
|
|
0.75 0.91666667 0.72727273 0.90909091]
|
|
|
|
mean value: 0.7878787878787878
|
|
|
|
key: train_recall
|
|
value: [0.99029126 1. 0.23300971 1. 0.96078431 0.90196078
|
|
0.61764706 0.96078431 0.86407767 1. ]
|
|
|
|
mean value: 0.8528555111364935
|
|
|
|
key: test_roc_auc
|
|
value: [0.6969697 0.83333333 0.59090909 0.75 0.875 0.91287879
|
|
0.875 0.95833333 0.81818182 0.59090909]
|
|
|
|
mean value: 0.7901515151515152
|
|
|
|
key: train_roc_auc
|
|
value: [0.9363221 0.89705882 0.61650485 0.83333333 0.96097468 0.93156292
|
|
0.80882353 0.93670284 0.9223301 0.74271845]
|
|
|
|
mean value: 0.8586331620026652
|
|
|
|
key: test_jcc
|
|
value: [0.53333333 0.73333333 0.18181818 0.64705882 0.75 0.84615385
|
|
0.75 0.91666667 0.66666667 0.52631579]
|
|
|
|
mean value: 0.6551346640975124
|
|
|
|
key: train_jcc
|
|
value: [0.88695652 0.83064516 0.23300971 0.75182482 0.9245283 0.86792453
|
|
0.61764706 0.88288288 0.84761905 0.66025641]
|
|
|
|
mean value: 0.7503294439056115
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12839508 0.113796 0.11646557 0.11455917 0.1184082 0.11943507
|
|
0.11837411 0.11078691 0.11046553 0.11171436]
|
|
|
|
mean value: 0.11624000072479249
|
|
|
|
key: score_time
|
|
value: [0.01480055 0.01611924 0.01634765 0.01499295 0.01620007 0.01611018
|
|
0.01495361 0.0148952 0.01476741 0.01726556]
|
|
|
|
mean value: 0.01564524173736572
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.82575758 0.91605722 0.66414149 0.83971912 0.91666667
|
|
0.91605722 0.83971912 0.81818182 0.91287093]
|
|
|
|
mean value: 0.8389650763028634
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.91304348 0.95652174 0.82608696 0.91304348 0.95652174
|
|
0.95652174 0.91304348 0.90909091 0.95454545]
|
|
|
|
mean value: 0.916798418972332
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.90909091 0.95238095 0.83333333 0.90909091 0.95652174
|
|
0.96 0.90909091 0.90909091 0.95238095]
|
|
|
|
mean value: 0.9148123470732166
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9 0.90909091 1. 0.76923077 1. 1.
|
|
0.92307692 1. 0.90909091 1. ]
|
|
|
|
mean value: 0.941048951048951
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.90909091 0.90909091 0.90909091 0.83333333 0.91666667
|
|
1. 0.83333333 0.90909091 0.90909091]
|
|
|
|
mean value: 0.8946969696969697
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.91287879 0.95454545 0.82954545 0.91666667 0.95833333
|
|
0.95454545 0.91666667 0.90909091 0.95454545]
|
|
|
|
mean value: 0.9174242424242425
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.83333333 0.90909091 0.71428571 0.83333333 0.91666667
|
|
0.92307692 0.83333333 0.83333333 0.90909091]
|
|
|
|
mean value: 0.8455544455544456
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04284787 0.04148149 0.05290413 0.04513884 0.03626966 0.05176854
|
|
0.04080129 0.04704714 0.04275584 0.04380989]
|
|
|
|
mean value: 0.04448246955871582
|
|
|
|
key: score_time
|
|
value: [0.01655555 0.02902532 0.01787877 0.02407384 0.01867771 0.02897787
|
|
0.01784182 0.03749013 0.01835752 0.02550364]
|
|
|
|
mean value: 0.023438215255737305
|
|
|
|
key: test_mcc
|
|
value: [0.74047959 0.83743579 0.91605722 0.58930667 0.76764947 0.83971912
|
|
0.91605722 1. 1. 0.81818182]
|
|
|
|
mean value: 0.8424886910191745
|
|
|
|
key: train_mcc
|
|
value: [0.98067587 0.98067587 1. 1. 1. 1.
|
|
0.99029126 0.99029034 0.99033794 0.99033794]
|
|
|
|
mean value: 0.9922609226032173
|
|
|
|
key: test_accuracy
|
|
value: [0.86956522 0.91304348 0.95652174 0.7826087 0.86956522 0.91304348
|
|
0.95652174 1. 1. 0.90909091]
|
|
|
|
mean value: 0.9169960474308301
|
|
|
|
key: train_accuracy
|
|
value: [0.9902439 0.9902439 1. 1. 1. 1.
|
|
0.99512195 0.99512195 0.99514563 0.99514563]
|
|
|
|
mean value: 0.9961022969452995
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.9 0.95238095 0.8 0.85714286 0.90909091
|
|
0.96 1. 1. 0.90909091]
|
|
|
|
mean value: 0.9144848484848485
|
|
|
|
key: train_fscore
|
|
value: [0.99019608 0.99019608 1. 1. 1. 1.
|
|
0.99512195 0.99507389 0.99516908 0.99516908]
|
|
|
|
mean value: 0.9960926163959081
|
|
|
|
key: test_precision
|
|
value: [0.9 1. 1. 0.71428571 1. 1.
|
|
0.92307692 1. 1. 0.90909091]
|
|
|
|
mean value: 0.9446453546453546
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.99029126 1. 0.99038462 0.99038462]
|
|
|
|
mean value: 0.9971060492905153
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.81818182 0.90909091 0.90909091 0.75 0.83333333
|
|
1. 1. 1. 0.90909091]
|
|
|
|
mean value: 0.8946969696969697
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.98058252 1. 1. 1. 1.
|
|
1. 0.99019608 1. 1. ]
|
|
|
|
mean value: 0.9951361126975062
|
|
|
|
key: test_roc_auc
|
|
value: [0.86742424 0.90909091 0.95454545 0.78787879 0.875 0.91666667
|
|
0.95454545 1. 1. 0.90909091]
|
|
|
|
mean value: 0.9174242424242425
|
|
|
|
key: train_roc_auc
|
|
value: [0.99029126 0.99029126 1. 1. 1. 1.
|
|
0.99514563 0.99509804 0.99514563 0.99514563]
|
|
|
|
mean value: 0.9961117456691414
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.81818182 0.90909091 0.66666667 0.75 0.83333333
|
|
0.92307692 1. 1. 0.83333333]
|
|
|
|
mean value: 0.8483682983682984
|
|
|
|
key: train_jcc
|
|
value: [0.98058252 0.98058252 1. 1. 1. 1.
|
|
0.99029126 0.99019608 0.99038462 0.99038462]
|
|
|
|
mean value: 0.9922421619880215
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04829431 0.08197236 0.07691717 0.05902553 0.02872419 0.02841234
|
|
0.06640029 0.04778814 0.02781248 0.0365293 ]
|
|
|
|
mean value: 0.05018761157989502
|
|
|
|
key: score_time
|
|
value: [0.02258968 0.02218485 0.02187991 0.01301789 0.01300526 0.01682043
|
|
0.01924825 0.01270652 0.01270413 0.02137733]
|
|
|
|
mean value: 0.017553424835205077
|
|
|
|
key: test_mcc
|
|
value: [0.3030303 0.83743579 0.31252706 0.12406456 0.56818182 0.47727273
|
|
0.41096386 0.82575758 0.48795004 0.2773501 ]
|
|
|
|
mean value: 0.462453382427294
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.65217391 0.91304348 0.65217391 0.56521739 0.7826087 0.73913043
|
|
0.69565217 0.91304348 0.72727273 0.63636364]
|
|
|
|
mean value: 0.7276679841897233
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.63636364 0.9 0.55555556 0.5 0.7826087 0.75
|
|
0.66666667 0.91666667 0.66666667 0.6 ]
|
|
|
|
mean value: 0.6974527887571366
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.63636364 1. 0.71428571 0.55555556 0.81818182 0.75
|
|
0.77777778 0.91666667 0.85714286 0.66666667]
|
|
|
|
mean value: 0.7692640692640693
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.63636364 0.81818182 0.45454545 0.45454545 0.75 0.75
|
|
0.58333333 0.91666667 0.54545455 0.54545455]
|
|
|
|
mean value: 0.6454545454545455
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.65151515 0.90909091 0.64393939 0.56060606 0.78409091 0.73863636
|
|
0.70075758 0.91287879 0.72727273 0.63636364]
|
|
|
|
mean value: 0.7265151515151516
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.46666667 0.81818182 0.38461538 0.33333333 0.64285714 0.6
|
|
0.5 0.84615385 0.5 0.42857143]
|
|
|
|
mean value: 0.5520379620379621
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.37749887 0.35624838 0.34953141 0.35143161 0.35564804 0.35179186
|
|
0.34444571 0.35502386 0.34801006 0.35423851]
|
|
|
|
mean value: 0.35438683032989504
|
|
|
|
key: score_time
|
|
value: [0.00946093 0.00907135 0.00899053 0.00892138 0.00922036 0.00899267
|
|
0.00900412 0.0091064 0.00907969 0.0090704 ]
|
|
|
|
mean value: 0.009091782569885253
|
|
|
|
key: test_mcc
|
|
value: [0.91666667 1. 0.91605722 0.6992059 0.76764947 1.
|
|
0.91605722 1. 1. 0.91287093]
|
|
|
|
mean value: 0.912850741785816
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95652174 1. 0.95652174 0.82608696 0.86956522 1.
|
|
0.95652174 1. 1. 0.95454545]
|
|
|
|
mean value: 0.9519762845849803
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95652174 1. 0.95238095 0.84615385 0.85714286 1.
|
|
0.96 1. 1. 0.95238095]
|
|
|
|
mean value: 0.9524580347189042
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91666667 1. 1. 0.73333333 1. 1.
|
|
0.92307692 1. 1. 1. ]
|
|
|
|
mean value: 0.9573076923076923
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.90909091 1. 0.75 1.
|
|
1. 1. 1. 0.90909091]
|
|
|
|
mean value: 0.9568181818181818
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.95833333 1. 0.95454545 0.83333333 0.875 1.
|
|
0.95454545 1. 1. 0.95454545]
|
|
|
|
mean value: 0.953030303030303
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91666667 1. 0.90909091 0.73333333 0.75 1.
|
|
0.92307692 1. 1. 0.90909091]
|
|
|
|
mean value: 0.9141258741258741
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01773167 0.01976418 0.03433776 0.01997304 0.01990628 0.02007365
|
|
0.02028775 0.0202179 0.02021074 0.0203495 ]
|
|
|
|
mean value: 0.021285247802734376
|
|
|
|
key: score_time
|
|
value: [0.01196408 0.014189 0.01221085 0.01400971 0.01760888 0.02596092
|
|
0.01817083 0.01999259 0.0198195 0.0188601 ]
|
|
|
|
mean value: 0.017278647422790526
|
|
|
|
key: test_mcc
|
|
value: [0.63327851 0.83971912 0.76764947 0.43929769 0.76277007 0.62050523
|
|
0.62050523 0.83743579 0.68313005 0.61237244]
|
|
|
|
mean value: 0.6816663591347039
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.7826087 0.91304348 0.86956522 0.65217391 0.86956522 0.7826087
|
|
0.7826087 0.91304348 0.81818182 0.77272727]
|
|
|
|
mean value: 0.8156126482213438
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.81481481 0.91666667 0.88 0.73333333 0.88888889 0.82758621
|
|
0.82758621 0.92307692 0.84615385 0.81481481]
|
|
|
|
mean value: 0.8472921701542391
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.6875 0.84615385 0.78571429 0.57894737 0.8 0.70588235
|
|
0.70588235 0.85714286 0.73333333 0.6875 ]
|
|
|
|
mean value: 0.7388056396647728
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79166667 0.91666667 0.875 0.66666667 0.86363636 0.77272727
|
|
0.77272727 0.90909091 0.81818182 0.77272727]
|
|
|
|
mean value: 0.8159090909090909
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6875 0.84615385 0.78571429 0.57894737 0.8 0.70588235
|
|
0.70588235 0.85714286 0.73333333 0.6875 ]
|
|
|
|
mean value: 0.7388056396647728
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0230031 0.03526139 0.03538561 0.02961564 0.03531909 0.03540421
|
|
0.03581238 0.03637147 0.03165317 0.03724432]
|
|
|
|
mean value: 0.033507037162780764
|
|
|
|
key: score_time
|
|
value: [0.01678348 0.02226853 0.02215648 0.02117872 0.02216363 0.02219868
|
|
0.02224374 0.02224016 0.02391219 0.02239656]
|
|
|
|
mean value: 0.02175421714782715
|
|
|
|
key: test_mcc
|
|
value: [0.82575758 0.74242424 0.65909298 0.65151515 0.76764947 0.74047959
|
|
0.82575758 0.82575758 0.73029674 0.46225016]
|
|
|
|
mean value: 0.7230981074087546
|
|
|
|
key: train_mcc
|
|
value: [0.92263761 0.90259929 0.93209539 0.93209539 0.95236324 0.92213232
|
|
0.92213232 0.903143 0.92250402 0.92302639]
|
|
|
|
mean value: 0.9234728990174977
|
|
|
|
key: test_accuracy
|
|
value: [0.91304348 0.86956522 0.82608696 0.82608696 0.86956522 0.86956522
|
|
0.91304348 0.91304348 0.86363636 0.72727273]
|
|
|
|
mean value: 0.8590909090909091
|
|
|
|
key: train_accuracy
|
|
value: [0.96097561 0.95121951 0.96585366 0.96585366 0.97560976 0.96097561
|
|
0.96097561 0.95121951 0.96116505 0.96116505]
|
|
|
|
mean value: 0.9615013023916646
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.86956522 0.8 0.81818182 0.85714286 0.88
|
|
0.91666667 0.91666667 0.86956522 0.7 ]
|
|
|
|
mean value: 0.8536879352531526
|
|
|
|
key: train_fscore
|
|
value: [0.96190476 0.95192308 0.96650718 0.96650718 0.97607656 0.96116505
|
|
0.96116505 0.95192308 0.96153846 0.96190476]
|
|
|
|
mean value: 0.9620615145372428
|
|
|
|
key: test_precision
|
|
value: [0.90909091 0.83333333 0.88888889 0.81818182 1. 0.84615385
|
|
0.91666667 0.91666667 0.83333333 0.77777778]
|
|
|
|
mean value: 0.874009324009324
|
|
|
|
key: train_precision
|
|
value: [0.94392523 0.94285714 0.95283019 0.95283019 0.95327103 0.95192308
|
|
0.95192308 0.93396226 0.95238095 0.94392523]
|
|
|
|
mean value: 0.9479828385920785
|
|
|
|
key: test_recall
|
|
value: [0.90909091 0.90909091 0.72727273 0.81818182 0.75 0.91666667
|
|
0.91666667 0.91666667 0.90909091 0.63636364]
|
|
|
|
mean value: 0.8409090909090909
|
|
|
|
key: train_recall
|
|
value: [0.98058252 0.96116505 0.98058252 0.98058252 1. 0.97058824
|
|
0.97058824 0.97058824 0.97087379 0.98058252]
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:195: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./pnca_orig.py:198: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
mean value: 0.9766133637921188
|
|
|
|
key: test_roc_auc
|
|
value: [0.91287879 0.87121212 0.8219697 0.82575758 0.875 0.86742424
|
|
0.91287879 0.91287879 0.86363636 0.72727273]
|
|
|
|
mean value: 0.859090909090909
|
|
|
|
key: train_roc_auc
|
|
value: [0.9608795 0.95117076 0.96578146 0.96578146 0.97572816 0.96102227
|
|
0.96102227 0.95131354 0.96116505 0.96116505]
|
|
|
|
mean value: 0.961502950694841
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.76923077 0.66666667 0.69230769 0.75 0.78571429
|
|
0.84615385 0.84615385 0.76923077 0.53846154]
|
|
|
|
mean value: 0.7497252747252747
|
|
|
|
key: train_jcc
|
|
value: [0.9266055 0.90825688 0.93518519 0.93518519 0.95327103 0.92523364
|
|
0.92523364 0.90825688 0.92592593 0.9266055 ]
|
|
|
|
mean value: 0.9269759384695507
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'rsa',
|
|
'kd_values',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=166)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.31841111 0.24217796 0.23903775 0.24415827 0.23731065 0.22925878
|
|
0.26342988 0.2629621 0.31826568 0.26236701]
|
|
|
|
mean value: 0.26173791885375974
|
|
|
|
key: score_time
|
|
value: [0.02236819 0.02346969 0.02272415 0.02223444 0.0223434 0.02419353
|
|
0.02253866 0.025352 0.02559161 0.02376676]
|
|
|
|
mean value: 0.023458242416381836
|
|
|
|
key: test_mcc
|
|
value: [0.65151515 0.56490196 0.65909298 0.65151515 0.66414149 0.74047959
|
|
0.74242424 0.82575758 0.63636364 0.36514837]
|
|
|
|
mean value: 0.6501340144993133
|
|
|
|
key: train_mcc
|
|
value: [0.92211753 0.92263761 0.93209539 0.93209539 0.9707786 0.92213232
|
|
0.92213232 0.903143 0.95150116 0.94192516]
|
|
|
|
mean value: 0.932055849012585
|
|
|
|
key: test_accuracy
|
|
value: [0.82608696 0.7826087 0.82608696 0.82608696 0.82608696 0.86956522
|
|
0.86956522 0.91304348 0.81818182 0.68181818]
|
|
|
|
mean value: 0.8239130434782609
|
|
|
|
key: train_accuracy
|
|
value: [0.96097561 0.96097561 0.96585366 0.96585366 0.98536585 0.96097561
|
|
0.96097561 0.95121951 0.97572816 0.97087379]
|
|
|
|
mean value: 0.9658797063698792
|
|
|
|
key: test_fscore
|
|
value: [0.81818182 0.76190476 0.8 0.81818182 0.81818182 0.88
|
|
0.86956522 0.91666667 0.81818182 0.66666667]
|
|
|
|
mean value: 0.8167530585356673
|
|
|
|
key: train_fscore
|
|
value: [0.96153846 0.96190476 0.96650718 0.96650718 0.98536585 0.96116505
|
|
0.96116505 0.95192308 0.97584541 0.97115385]
|
|
|
|
mean value: 0.9663075861961067
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.8 0.88888889 0.81818182 0.9 0.84615385
|
|
0.90909091 0.91666667 0.81818182 0.7 ]
|
|
|
|
mean value: 0.8415345765345765
|
|
|
|
key: train_precision
|
|
value: [0.95238095 0.94392523 0.95283019 0.95283019 0.98058252 0.95192308
|
|
0.95192308 0.93396226 0.97115385 0.96190476]
|
|
|
|
mean value: 0.9553416113711852
|
|
|
|
key: test_recall
|
|
value: [0.81818182 0.72727273 0.72727273 0.81818182 0.75 0.91666667
|
|
0.83333333 0.91666667 0.81818182 0.63636364]
|
|
|
|
mean value: 0.7962121212121213
|
|
|
|
key: train_recall
|
|
value: [0.97087379 0.98058252 0.98058252 0.98058252 0.99019608 0.97058824
|
|
0.97058824 0.97058824 0.98058252 0.98058252]
|
|
|
|
mean value: 0.9775747192080716
|
|
|
|
key: test_roc_auc
|
|
value: [0.82575758 0.78030303 0.8219697 0.82575758 0.82954545 0.86742424
|
|
0.87121212 0.91287879 0.81818182 0.68181818]
|
|
|
|
mean value: 0.8234848484848485
|
|
|
|
key: train_roc_auc
|
|
value: [0.96092709 0.9608795 0.96578146 0.96578146 0.9853893 0.96102227
|
|
0.96102227 0.95131354 0.97572816 0.97087379]
|
|
|
|
mean value: 0.965871882733676
|
|
|
|
key: test_jcc
|
|
value: [0.69230769 0.61538462 0.66666667 0.69230769 0.69230769 0.78571429
|
|
0.76923077 0.84615385 0.69230769 0.5 ]
|
|
|
|
mean value: 0.6952380952380952
|
|
|
|
key: train_jcc
|
|
value: [0.92592593 0.9266055 0.93518519 0.93518519 0.97115385 0.92523364
|
|
0.92523364 0.90825688 0.95283019 0.94392523]
|
|
|
|
mean value: 0.9349535239814974
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.55
|