19713 lines
980 KiB
Text
19713 lines
980 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_cd_7030.py:548: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 858
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 858
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
PASS: x_features has no target variable
|
|
|
|
No. of columns for x_features: 175
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data with stratification [COMPLETE data]: 70/30
|
|
Original data size: (858, 175)
|
|
Train data size: (574, 175)
|
|
Test data size: (284, 175)
|
|
y_train numbers: Counter({0: 494, 1: 80})
|
|
y_train ratio: 6.175
|
|
|
|
y_test_numbers: Counter({0: 244, 1: 40})
|
|
y_test ratio: 6.1
|
|
-------------------------------------------------------------
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: False
|
|
Original Data
|
|
Counter({0: 494, 1: 80}) Data dim: (574, 175)
|
|
|
|
Simple Random OverSampling
|
|
Counter({0: 494, 1: 494})
|
|
(988, 175)
|
|
|
|
Simple Random UnderSampling
|
|
Counter({0: 80, 1: 80})
|
|
(160, 175)
|
|
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 494, 1: 494})
|
|
(988, 175)
|
|
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 494, 1: 494})
|
|
(988, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis [COMPLETE DATA]: 70/30 split
|
|
Gene name: embB
|
|
Drug name: ethambutol
|
|
|
|
Output directory: /home/tanu/git/Data/ethambutol/output/ml/tts_cd_7030/
|
|
|
|
Sanity checks:
|
|
Total input features: 175
|
|
|
|
Training data size: (574, 175)
|
|
Test data size: (284, 175)
|
|
|
|
Target feature numbers (training data): Counter({0: 494, 1: 80})
|
|
Target features ratio (training data: 6.175
|
|
|
|
Target feature numbers (test data): Counter({0: 244, 1: 40})
|
|
Target features ratio (test data): 6.1
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04263425 0.03704119 0.03798628 0.03799677 0.03774238 0.04040122
|
|
0.03711438 0.03827047 0.03824377 0.05451298]
|
|
|
|
mean value: 0.040194368362426756
|
|
|
|
key: score_time
|
|
value: [0.01482487 0.01210594 0.01444292 0.01208472 0.01211548 0.01213193
|
|
0.0142386 0.01428914 0.01436877 0.01423168]
|
|
|
|
mean value: 0.013483405113220215
|
|
|
|
key: test_mcc
|
|
value: [0.47245559 0.61922967 0.61922967 0.35810951 0.4719399 0.76742577
|
|
0.5197192 0.41033786 0.48217405 0.35714286]
|
|
|
|
mean value: 0.5077764072193169
|
|
|
|
key: train_mcc
|
|
value: [0.62617506 0.61801227 0.65806431 0.66845905 0.66057906 0.6395729
|
|
0.67881796 0.65919543 0.66956533 0.65813958]
|
|
|
|
mean value: 0.653658093185859
|
|
|
|
key: test_accuracy
|
|
value: [0.89655172 0.9137931 0.9137931 0.87931034 0.89473684 0.94736842
|
|
0.89473684 0.87719298 0.89473684 0.87719298]
|
|
|
|
mean value: 0.8989413188142771
|
|
|
|
key: train_accuracy
|
|
value: [0.92054264 0.91860465 0.92635659 0.92829457 0.92649903 0.92263056
|
|
0.9303675 0.92649903 0.92843327 0.92649903]
|
|
|
|
mean value: 0.9254726882881262
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.66666667 0.66666667 0.36363636 0.4 0.76923077
|
|
0.57142857 0.46153846 0.5 0.36363636]
|
|
|
|
mean value: 0.5162803862803863
|
|
|
|
key: train_fscore
|
|
value: [0.63716814 0.63793103 0.67241379 0.68376068 0.68333333 0.66101695
|
|
0.69491525 0.6779661 0.68907563 0.67241379]
|
|
|
|
mean value: 0.6709994714713439
|
|
|
|
key: test_precision
|
|
value: [1. 0.71428571 0.71428571 0.66666667 1. 1.
|
|
0.66666667 0.6 0.75 0.66666667]
|
|
|
|
mean value: 0.7778571428571428
|
|
|
|
key: train_precision
|
|
value: [0.87804878 0.84090909 0.88636364 0.88888889 0.85416667 0.84782609
|
|
0.89130435 0.86956522 0.87234043 0.88636364]
|
|
|
|
mean value: 0.8715776777385552
|
|
|
|
key: test_recall
|
|
value: [0.25 0.625 0.625 0.25 0.25 0.625 0.5 0.375 0.375 0.25 ]
|
|
|
|
mean value: 0.4125
|
|
|
|
key: train_recall
|
|
value: [0.5 0.51388889 0.54166667 0.55555556 0.56944444 0.54166667
|
|
0.56944444 0.55555556 0.56944444 0.54166667]
|
|
|
|
mean value: 0.5458333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.7925 0.7925 0.615 0.625 0.8125
|
|
0.72959184 0.66709184 0.67729592 0.61479592]
|
|
|
|
mean value: 0.6951275510204081
|
|
|
|
key: train_roc_auc
|
|
value: [0.74436937 0.74906156 0.7652027 0.77214715 0.77685705 0.76296816
|
|
0.77910424 0.7710362 0.77798065 0.76521536]
|
|
|
|
mean value: 0.7663942453689645
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.5 0.5 0.22222222 0.25 0.625
|
|
0.4 0.3 0.33333333 0.22222222]
|
|
|
|
mean value: 0.36027777777777775
|
|
|
|
key: train_jcc
|
|
value: [0.46753247 0.46835443 0.50649351 0.51948052 0.51898734 0.49367089
|
|
0.53246753 0.51282051 0.52564103 0.50649351]
|
|
|
|
mean value: 0.5051941729156919
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.22629714 0.89323449 1.23205638 1.07805514 1.29640412 0.98114443
|
|
1.02567697 1.04395366 1.02444291 0.86454535]
|
|
|
|
mean value: 1.0665810585021973
|
|
|
|
key: score_time
|
|
value: [0.014323 0.01455164 0.02080441 0.01586699 0.01295447 0.01273704
|
|
0.01546025 0.01522136 0.01536155 0.01578355]
|
|
|
|
mean value: 0.015306425094604493
|
|
|
|
key: test_mcc
|
|
value: [0.47245559 0.7952381 0.565 0.19843135 0.58333333 0.61824189
|
|
0.61824189 0.41033786 0.58888181 0.48217405]
|
|
|
|
mean value: 0.5332335869245143
|
|
|
|
key: train_mcc
|
|
value: [0.73779806 0.69998481 0.73779806 0.70995861 0.72971015 0.71129194
|
|
0.72876544 0.75026863 0.76654656 0.71895922]
|
|
|
|
mean value: 0.729108147983468
|
|
|
|
key: test_accuracy
|
|
value: [0.89655172 0.94827586 0.89655172 0.86206897 0.9122807 0.9122807
|
|
0.9122807 0.87719298 0.9122807 0.89473684]
|
|
|
|
mean value: 0.9024500907441017
|
|
|
|
key: train_accuracy
|
|
value: [0.94186047 0.93410853 0.94186047 0.93604651 0.94003868 0.93617021
|
|
0.94003868 0.94390716 0.94777563 0.93810445]
|
|
|
|
mean value: 0.939991078523983
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.82352941 0.625 0.2 0.54545455 0.66666667
|
|
0.66666667 0.46153846 0.61538462 0.5 ]
|
|
|
|
mean value: 0.5504240367475661
|
|
|
|
key: train_fscore
|
|
value: [0.75409836 0.72131148 0.75409836 0.73170732 0.752 0.736
|
|
0.74796748 0.7751938 0.784 0.73770492]
|
|
|
|
mean value: 0.7494081709951679
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.625 0.5 1. 0.71428571
|
|
0.71428571 0.6 0.8 0.75 ]
|
|
|
|
mean value: 0.7481349206349206
|
|
|
|
key: train_precision
|
|
value: [0.92 0.88 0.92 0.88235294 0.88679245 0.86792453
|
|
0.90196078 0.87719298 0.9245283 0.9 ]
|
|
|
|
mean value: 0.8960751990965204
|
|
|
|
key: test_recall
|
|
value: [0.25 0.875 0.625 0.125 0.375 0.625 0.625 0.375 0.5 0.375]
|
|
|
|
mean value: 0.475
|
|
|
|
key: train_recall
|
|
value: [0.63888889 0.61111111 0.63888889 0.625 0.65277778 0.63888889
|
|
0.63888889 0.69444444 0.68055556 0.625 ]
|
|
|
|
mean value: 0.6444444444444444
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.9175 0.7825 0.5525 0.6875 0.79209184
|
|
0.79209184 0.66709184 0.73979592 0.67729592]
|
|
|
|
mean value: 0.7233367346938775
|
|
|
|
key: train_roc_auc
|
|
value: [0.81493994 0.7987988 0.81493994 0.80574324 0.81964732 0.81157928
|
|
0.81382647 0.83935705 0.8357834 0.80688202]
|
|
|
|
mean value: 0.8161497452508688
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.7 0.45454545 0.11111111 0.375 0.5
|
|
0.5 0.3 0.44444444 0.33333333]
|
|
|
|
mean value: 0.39684343434343433
|
|
|
|
key: train_jcc
|
|
value: [0.60526316 0.56410256 0.60526316 0.57692308 0.6025641 0.58227848
|
|
0.5974026 0.63291139 0.64473684 0.58441558]
|
|
|
|
mean value: 0.5995860956720384
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0150249 0.01069021 0.01066637 0.01032138 0.01021028 0.01014924
|
|
0.0103476 0.01076198 0.01043892 0.01140046]
|
|
|
|
mean value: 0.011001133918762207
|
|
|
|
key: score_time
|
|
value: [0.01307988 0.00943756 0.00928164 0.00906396 0.00903034 0.00902414
|
|
0.0091517 0.00940466 0.00917268 0.00976062]
|
|
|
|
mean value: 0.009640717506408691
|
|
|
|
key: test_mcc
|
|
value: [0.34689095 0.44419749 0.44887456 0.275 0.47759734 0.41079192
|
|
0.25 0.08479564 0.47759734 0.44233157]
|
|
|
|
mean value: 0.365807680678795
|
|
|
|
key: train_mcc
|
|
value: [0.53341876 0.51780522 0.52519266 0.51706977 0.5102571 0.57269947
|
|
0.48292611 0.58448434 0.50999122 0.53798959]
|
|
|
|
mean value: 0.5291834236151625
|
|
|
|
key: test_accuracy
|
|
value: [0.82758621 0.84482759 0.81034483 0.82758621 0.85964912 0.8245614
|
|
0.70175439 0.68421053 0.85964912 0.84210526]
|
|
|
|
mean value: 0.8082274652147611
|
|
|
|
key: train_accuracy
|
|
value: [0.85271318 0.85465116 0.85852713 0.87984496 0.84719536 0.8762089
|
|
0.81818182 0.8762089 0.83945841 0.85880077]
|
|
|
|
mean value: 0.8561790592715877
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.52631579 0.52173913 0.375 0.55555556 0.5
|
|
0.37037037 0.25 0.55555556 0.52631579]
|
|
|
|
mean value: 0.4625296635308077
|
|
|
|
key: train_fscore
|
|
value: [0.59574468 0.58563536 0.59217877 0.58666667 0.57754011 0.63218391
|
|
0.54807692 0.64044944 0.57435897 0.6010929 ]
|
|
|
|
mean value: 0.593392772439433
|
|
|
|
key: test_precision
|
|
value: [0.4 0.45454545 0.4 0.375 0.5 0.41666667
|
|
0.26315789 0.1875 0.5 0.45454545]
|
|
|
|
mean value: 0.39514154704944177
|
|
|
|
key: train_precision
|
|
value: [0.48275862 0.48623853 0.4953271 0.56410256 0.46956522 0.53921569
|
|
0.41911765 0.53773585 0.45528455 0.4954955 ]
|
|
|
|
mean value: 0.4944841267828315
|
|
|
|
key: test_recall
|
|
value: [0.5 0.625 0.75 0.375 0.625 0.625 0.625 0.375 0.625 0.625]
|
|
|
|
mean value: 0.575
|
|
|
|
key: train_recall
|
|
value: [0.77777778 0.73611111 0.73611111 0.61111111 0.75 0.76388889
|
|
0.79166667 0.79166667 0.77777778 0.76388889]
|
|
|
|
mean value: 0.75
|
|
|
|
key: test_roc_auc
|
|
value: [0.69 0.7525 0.785 0.6375 0.76147959 0.74107143
|
|
0.66964286 0.55484694 0.76147959 0.75127551]
|
|
|
|
mean value: 0.7104795918367347
|
|
|
|
key: train_roc_auc
|
|
value: [0.82132132 0.80499249 0.80724474 0.76726727 0.80646067 0.82913546
|
|
0.80706929 0.84077715 0.81360799 0.8190231 ]
|
|
|
|
mean value: 0.8116899483753416
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.35714286 0.35294118 0.23076923 0.38461538 0.33333333
|
|
0.22727273 0.14285714 0.38461538 0.35714286]
|
|
|
|
mean value: 0.3056404379933792
|
|
|
|
key: train_jcc
|
|
value: [0.42424242 0.4140625 0.42063492 0.41509434 0.40601504 0.46218487
|
|
0.37748344 0.47107438 0.4028777 0.4296875 ]
|
|
|
|
mean value: 0.4223357117759176
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01062655 0.01041269 0.01084423 0.01042485 0.01048541 0.01059198
|
|
0.01061225 0.01127267 0.01166177 0.01069999]
|
|
|
|
mean value: 0.010763239860534669
|
|
|
|
key: score_time
|
|
value: [0.00900936 0.00969958 0.00903201 0.00901031 0.00928688 0.00904131
|
|
0.00920987 0.00900078 0.00979638 0.00972509]
|
|
|
|
mean value: 0.009281158447265625
|
|
|
|
key: test_mcc
|
|
value: [-0.07559289 0.23343214 0.1924871 -0.09341987 -0.05399492 0.12755102
|
|
0.58333333 0.15658528 0.15658528 0.27295918]
|
|
|
|
mean value: 0.14999256483501192
|
|
|
|
key: train_mcc
|
|
value: [0.30966398 0.28072414 0.26572094 0.28330563 0.23769701 0.26892262
|
|
0.24847359 0.28937166 0.25064836 0.35257444]
|
|
|
|
mean value: 0.2787102363205456
|
|
|
|
key: test_accuracy
|
|
value: [0.82758621 0.84482759 0.82758621 0.81034483 0.84210526 0.78947368
|
|
0.9122807 0.80701754 0.80701754 0.8245614 ]
|
|
|
|
mean value: 0.8292800967937084
|
|
|
|
key: train_accuracy
|
|
value: [0.86046512 0.85077519 0.85465116 0.85465116 0.84526112 0.85299807
|
|
0.84912959 0.85686654 0.85299807 0.86460348]
|
|
|
|
mean value: 0.8542399502196633
|
|
|
|
key: test_fscore
|
|
value: [0. 0.30769231 0.28571429 0. 0. 0.25
|
|
0.54545455 0.26666667 0.26666667 0.375 ]
|
|
|
|
mean value: 0.22971944721944723
|
|
|
|
key: train_fscore
|
|
value: [0.36842105 0.35294118 0.32432432 0.34782609 0.31034483 0.33333333
|
|
0.31578947 0.35087719 0.30909091 0.41666667]
|
|
|
|
mean value: 0.3429615043726796
|
|
|
|
key: test_precision
|
|
value: [0. 0.4 0.33333333 0. 0. 0.25
|
|
1. 0.28571429 0.28571429 0.375 ]
|
|
|
|
mean value: 0.2929761904761905
|
|
|
|
key: train_precision
|
|
value: [0.5 0.44680851 0.46153846 0.46511628 0.40909091 0.45238095
|
|
0.42857143 0.47619048 0.44736842 0.52083333]
|
|
|
|
mean value: 0.4607898771866258
|
|
|
|
key: test_recall
|
|
value: [0. 0.25 0.25 0. 0. 0.25 0.375 0.25 0.25 0.375]
|
|
|
|
mean value: 0.2
|
|
|
|
key: train_recall
|
|
value: [0.29166667 0.29166667 0.25 0.27777778 0.25 0.26388889
|
|
0.25 0.27777778 0.23611111 0.34722222]
|
|
|
|
mean value: 0.27361111111111114
|
|
|
|
key: test_roc_auc
|
|
value: [0.48 0.595 0.585 0.47 0.48979592 0.56377551
|
|
0.6875 0.57397959 0.57397959 0.63647959]
|
|
|
|
mean value: 0.5655510204081632
|
|
|
|
key: train_roc_auc
|
|
value: [0.62218468 0.61655405 0.60135135 0.61298799 0.59578652 0.60610175
|
|
0.59803371 0.61416979 0.59446005 0.64776841]
|
|
|
|
mean value: 0.6109398302797179
|
|
|
|
key: test_jcc
|
|
value: [0. 0.18181818 0.16666667 0. 0. 0.14285714
|
|
0.375 0.15384615 0.15384615 0.23076923]
|
|
|
|
mean value: 0.140480352980353
|
|
|
|
key: train_jcc
|
|
value: [0.22580645 0.21428571 0.19354839 0.21052632 0.18367347 0.2
|
|
0.1875 0.21276596 0.1827957 0.26315789]
|
|
|
|
mean value: 0.20740598892810022
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01039195 0.01144099 0.00996161 0.00982189 0.01064062 0.01074576
|
|
0.01099849 0.01064777 0.01080966 0.0107832 ]
|
|
|
|
mean value: 0.010624194145202636
|
|
|
|
key: score_time
|
|
value: [0.05882049 0.01525664 0.01549125 0.01230073 0.01294708 0.01280308
|
|
0.01313806 0.01308799 0.01519942 0.01323152]
|
|
|
|
mean value: 0.018227624893188476
|
|
|
|
key: test_mcc
|
|
value: [-0.05298129 0. 0. 0. -0.05399492 0.33071891
|
|
0. -0.0952381 0. 0. ]
|
|
|
|
mean value: 0.012850459964677291
|
|
|
|
key: train_mcc
|
|
value: [0.30852901 0.34606844 0.32981365 0.32784226 0.30176831 0.34591937
|
|
0.28912254 0.3992732 0.29222519 0.39575277]
|
|
|
|
mean value: 0.3336314742803226
|
|
|
|
key: test_accuracy
|
|
value: [0.84482759 0.86206897 0.86206897 0.86206897 0.84210526 0.87719298
|
|
0.85964912 0.80701754 0.85964912 0.85964912]
|
|
|
|
mean value: 0.8536297640653357
|
|
|
|
key: train_accuracy
|
|
value: [0.87596899 0.87984496 0.87790698 0.87790698 0.87427466 0.88007737
|
|
0.87427466 0.88588008 0.87427466 0.88588008]
|
|
|
|
mean value: 0.8786289415680806
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0.22222222
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.02222222222222222
|
|
|
|
key: train_fscore
|
|
value: [0.2195122 0.26190476 0.27586207 0.25882353 0.26966292 0.27906977
|
|
0.21686747 0.30588235 0.23529412 0.33707865]
|
|
|
|
mean value: 0.2659957836347317
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.1
|
|
|
|
key: train_precision
|
|
value: [0.9 0.91666667 0.8 0.84615385 0.70588235 0.85714286
|
|
0.81818182 1. 0.76923077 0.88235294]
|
|
|
|
mean value: 0.8495611251493604
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0.125 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.0125
|
|
|
|
key: train_recall
|
|
value: [0.125 0.15277778 0.16666667 0.15277778 0.16666667 0.16666667
|
|
0.125 0.18055556 0.13888889 0.20833333]
|
|
|
|
mean value: 0.15833333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.49 0.5 0.5 0.5 0.48979592 0.5625
|
|
0.5 0.46938776 0.5 0.5 ]
|
|
|
|
mean value: 0.5011683673469388
|
|
|
|
key: train_roc_auc
|
|
value: [0.56137387 0.57526276 0.57995495 0.57413664 0.57771536 0.58108614
|
|
0.56025281 0.59027778 0.56607366 0.60191948]
|
|
|
|
mean value: 0.5768053446705133
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0.125 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.0125
|
|
|
|
key: train_jcc
|
|
value: [0.12328767 0.15068493 0.16 0.14864865 0.15584416 0.16216216
|
|
0.12162162 0.18055556 0.13333333 0.2027027 ]
|
|
|
|
mean value: 0.1538840782607906
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02262378 0.01849151 0.0185678 0.01850915 0.01892066 0.01834273
|
|
0.01847243 0.0184772 0.01857615 0.0187006 ]
|
|
|
|
mean value: 0.01896820068359375
|
|
|
|
key: score_time
|
|
value: [0.0128479 0.01111269 0.01132369 0.01111841 0.01130962 0.01112962
|
|
0.01148701 0.01093578 0.01147842 0.01162195]
|
|
|
|
mean value: 0.01143651008605957
|
|
|
|
key: test_mcc
|
|
value: [ 0. -0.05298129 0. 0. 0. 0.
|
|
0. 0.33071891 0. 0. ]
|
|
|
|
mean value: 0.027773761960047202
|
|
|
|
key: train_mcc
|
|
value: [0.26934925 0.26934925 0.31162936 0.31162936 0.29125788 0.29125788
|
|
0.18992968 0.33090487 0.24567659 0.31167349]
|
|
|
|
mean value: 0.282265762191243
|
|
|
|
key: test_accuracy
|
|
value: [0.86206897 0.84482759 0.86206897 0.86206897 0.85964912 0.85964912
|
|
0.85964912 0.87719298 0.85964912 0.85964912]
|
|
|
|
mean value: 0.8606473079249849
|
|
|
|
key: train_accuracy
|
|
value: [0.87209302 0.87209302 0.87596899 0.87596899 0.87427466 0.87427466
|
|
0.86653772 0.87814313 0.87040619 0.8762089 ]
|
|
|
|
mean value: 0.8735969292129608
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0.
|
|
0. 0.22222222 0. 0. ]
|
|
|
|
mean value: 0.02222222222222222
|
|
|
|
key: train_fscore
|
|
value: [0.15384615 0.15384615 0.2 0.2 0.17721519 0.17721519
|
|
0.08 0.22222222 0.12987013 0.2 ]
|
|
|
|
mean value: 0.16942150395314953
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
|
|
|
|
mean value: 0.1
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0.125 0. 0. ]
|
|
|
|
mean value: 0.0125
|
|
|
|
key: train_recall
|
|
value: [0.08333333 0.08333333 0.11111111 0.11111111 0.09722222 0.09722222
|
|
0.04166667 0.125 0.06944444 0.11111111]
|
|
|
|
mean value: 0.09305555555555556
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.49 0.5 0.5 0.5 0.5 0.5 0.5625 0.5 0.5 ]
|
|
|
|
mean value: 0.50525
|
|
|
|
key: train_roc_auc
|
|
value: [0.54166667 0.54166667 0.55555556 0.55555556 0.54861111 0.54861111
|
|
0.52083333 0.5625 0.53472222 0.55555556]
|
|
|
|
mean value: 0.5465277777777778
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0.125 0. 0. ]
|
|
|
|
mean value: 0.0125
|
|
|
|
key: train_jcc
|
|
value: [0.08333333 0.08333333 0.11111111 0.11111111 0.09722222 0.09722222
|
|
0.04166667 0.125 0.06944444 0.11111111]
|
|
|
|
mean value: 0.09305555555555556
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.85801458 1.84940219 1.97478437 2.09506321 2.11832547 2.09276605
|
|
2.04526186 2.10188341 2.16838956 2.7913096 ]
|
|
|
|
mean value: 2.1095200300216677
|
|
|
|
key: score_time
|
|
value: [0.01269364 0.01262355 0.01577377 0.01264954 0.01251841 0.01581526
|
|
0.01749182 0.02402568 0.01288605 0.02757978]
|
|
|
|
mean value: 0.016405749320983886
|
|
|
|
key: test_mcc
|
|
value: [0.35810951 0.71 0.44419749 0.35666727 0.58333333 0.68429694
|
|
0.5197192 0.27295918 0.48217405 0.31047082]
|
|
|
|
mean value: 0.47219277979682606
|
|
|
|
key: train_mcc
|
|
value: [0.96746666 0.93440752 0.97567874 0.97595976 0.97566179 0.96747666
|
|
0.99191739 0.96747666 0.95159176 0.9756863 ]
|
|
|
|
mean value: 0.9683323225073224
|
|
|
|
key: test_accuracy
|
|
value: [0.87931034 0.93103448 0.84482759 0.86206897 0.9122807 0.92982456
|
|
0.89473684 0.8245614 0.89473684 0.84210526]
|
|
|
|
mean value: 0.8815486993345433
|
|
|
|
key: train_accuracy
|
|
value: [0.99224806 0.98449612 0.99418605 0.99418605 0.99419729 0.99226306
|
|
0.99806576 0.99226306 0.98839458 0.99419729]
|
|
|
|
mean value: 0.9924497323557195
|
|
|
|
key: test_fscore
|
|
value: [0.36363636 0.75 0.52631579 0.42857143 0.54545455 0.71428571
|
|
0.57142857 0.375 0.5 0.4 ]
|
|
|
|
mean value: 0.5174692412850308
|
|
|
|
key: train_fscore
|
|
value: [0.97183099 0.94285714 0.97902098 0.97931034 0.9787234 0.97183099
|
|
0.99300699 0.97183099 0.95833333 0.97902098]
|
|
|
|
mean value: 0.9725766134068812
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.45454545 0.5 1. 0.83333333
|
|
0.66666667 0.375 0.75 0.42857143]
|
|
|
|
mean value: 0.6424783549783549
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
train_precision
|
|
value: [0.98571429 0.97058824 0.98591549 0.97260274 1. 0.98571429
|
|
1. 0.98571429 0.95833333 0.98591549]
|
|
|
|
mean value: 0.9830498151411828
|
|
|
|
key: test_recall
|
|
value: [0.25 0.75 0.625 0.375 0.375 0.625 0.5 0.375 0.375 0.375]
|
|
|
|
mean value: 0.4625
|
|
|
|
key: train_recall
|
|
value: [0.95833333 0.91666667 0.97222222 0.98611111 0.95833333 0.95833333
|
|
0.98611111 0.95833333 0.95833333 0.97222222]
|
|
|
|
mean value: 0.9625
|
|
|
|
key: test_roc_auc
|
|
value: [0.615 0.855 0.7525 0.6575 0.6875 0.80229592
|
|
0.72959184 0.63647959 0.67729592 0.64668367]
|
|
|
|
mean value: 0.705984693877551
|
|
|
|
key: train_roc_auc
|
|
value: [0.97804054 0.95608108 0.98498498 0.9908033 0.97916667 0.97804307
|
|
0.99305556 0.97804307 0.97579588 0.98498752]
|
|
|
|
mean value: 0.9799001670209536
|
|
|
|
key: test_jcc
|
|
value: [0.22222222 0.6 0.35714286 0.27272727 0.375 0.55555556
|
|
0.4 0.23076923 0.33333333 0.25 ]
|
|
|
|
mean value: 0.35967504717504717
|
|
|
|
key: train_jcc
|
|
value: [0.94520548 0.89189189 0.95890411 0.95945946 0.95833333 0.94520548
|
|
0.98611111 0.94520548 0.92 0.95890411]
|
|
|
|
mean value: 0.9469220453330043
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05454445 0.03568411 0.03453302 0.0279057 0.04572034 0.03159571
|
|
0.03391123 0.03266001 0.02976894 0.04420066]
|
|
|
|
mean value: 0.03705241680145264
|
|
|
|
key: score_time
|
|
value: [0.01313186 0.01358676 0.01003742 0.01008749 0.00921917 0.00901103
|
|
0.01550436 0.00951743 0.01591778 0.00948715]
|
|
|
|
mean value: 0.011550045013427735
|
|
|
|
key: test_mcc
|
|
value: [0.65714286 0.61162352 0.52084744 0.52084744 0.77212742 0.50261554
|
|
0.6103927 0.57031583 0.5197192 0.56377551]
|
|
|
|
mean value: 0.5849407442692445
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9137931 0.89655172 0.89655172 0.89655172 0.94736842 0.84210526
|
|
0.89473684 0.87719298 0.89473684 0.89473684]
|
|
|
|
mean value: 0.8954325468844525
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.66666667 0.57142857 0.57142857 0.8 0.57142857
|
|
0.66666667 0.63157895 0.57142857 0.625 ]
|
|
|
|
mean value: 0.6381508919357216
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.6 0.66666667 0.66666667 0.85714286 0.46153846
|
|
0.6 0.54545455 0.66666667 0.625 ]
|
|
|
|
mean value: 0.635580253080253
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.5 0.5 0.75 0.75 0.75 0.75 0.5 0.625]
|
|
|
|
mean value: 0.6625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.845 0.835 0.73 0.73 0.86479592 0.80357143
|
|
0.83418367 0.82397959 0.72959184 0.78188776]
|
|
|
|
mean value: 0.7978010204081633
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.5 0.4 0.4 0.66666667 0.4
|
|
0.5 0.46153846 0.4 0.45454545]
|
|
|
|
mean value: 0.47282051282051285
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1327951 0.12327695 0.13055015 0.14695501 0.1336956 0.12559557
|
|
0.12217355 0.12586927 0.12189078 0.12276053]
|
|
|
|
mean value: 0.1285562515258789
|
|
|
|
key: score_time
|
|
value: [0.01817608 0.01838422 0.02003121 0.02497005 0.01951098 0.01909184
|
|
0.01816821 0.02352285 0.01824951 0.01839209]
|
|
|
|
mean value: 0.019849705696105956
|
|
|
|
key: test_mcc
|
|
value: [0. 0.19843135 0.2857738 0. 0.33071891 0.4719399
|
|
0.35714286 0.4719399 0.35714286 0.33071891]
|
|
|
|
mean value: 0.2803808501154949
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.86206897 0.86206897 0.86206897 0.86206897 0.87719298 0.89473684
|
|
0.87719298 0.89473684 0.87719298 0.87719298]
|
|
|
|
mean value: 0.8746521476104053
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.2 0.33333333 0. 0.22222222 0.4
|
|
0.36363636 0.4 0.36363636 0.22222222]
|
|
|
|
mean value: 0.2505050505050505
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0.5 0.5 0. 1. 1.
|
|
0.66666667 1. 0.66666667 1. ]
|
|
|
|
mean value: 0.6333333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.125 0.25 0. 0.125 0.25 0.25 0.25 0.25 0.125]
|
|
|
|
mean value: 0.1625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5525 0.605 0.5 0.5625 0.625
|
|
0.61479592 0.625 0.61479592 0.5625 ]
|
|
|
|
mean value: 0.5762091836734694
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.11111111 0.2 0. 0.125 0.25
|
|
0.22222222 0.25 0.22222222 0.125 ]
|
|
|
|
mean value: 0.15055555555555555
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01060414 0.0103755 0.01072049 0.01170206 0.011096 0.01059413
|
|
0.01050496 0.01154757 0.01851821 0.01054764]
|
|
|
|
mean value: 0.01162106990814209
|
|
|
|
key: score_time
|
|
value: [0.00884962 0.00893354 0.00923753 0.00905967 0.00929594 0.00918555
|
|
0.00900674 0.01196909 0.01183558 0.00871658]
|
|
|
|
mean value: 0.009608983993530273
|
|
|
|
key: test_mcc
|
|
value: [ 0.23343214 0.24285714 0.04256283 0.02830693 0.00269975 0.15658528
|
|
-0.03645074 -0.0952381 0.15658528 0.27295918]
|
|
|
|
mean value: 0.10042996974472658
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.84482759 0.81034483 0.72413793 0.79310345 0.77192982 0.80701754
|
|
0.73684211 0.80701754 0.80701754 0.8245614 ]
|
|
|
|
mean value: 0.7926799758015729
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.30769231 0.35294118 0.2 0.14285714 0.13333333 0.26666667
|
|
0.11764706 0. 0.26666667 0.375 ]
|
|
|
|
mean value: 0.2162804352510235
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.4 0.33333333 0.16666667 0.16666667 0.14285714 0.28571429
|
|
0.11111111 0. 0.28571429 0.375 ]
|
|
|
|
mean value: 0.2267063492063492
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0.375 0.25 0.125 0.125 0.25 0.125 0. 0.25 0.375]
|
|
|
|
mean value: 0.2125
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.595 0.6275 0.525 0.5125 0.50127551 0.57397959
|
|
0.48086735 0.46938776 0.57397959 0.63647959]
|
|
|
|
mean value: 0.5495969387755102
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.18181818 0.21428571 0.11111111 0.07692308 0.07142857 0.15384615
|
|
0.0625 0. 0.15384615 0.23076923]
|
|
|
|
mean value: 0.12565281940281942
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.04362011 2.03106213 2.0903945 1.98811364 1.95567226 2.06184196
|
|
1.95781398 1.95208192 1.97616935 1.99692726]
|
|
|
|
mean value: 2.005369710922241
|
|
|
|
key: score_time
|
|
value: [0.09700465 0.09313798 0.1012938 0.10016513 0.09421134 0.10920119
|
|
0.09990931 0.09628034 0.10220432 0.09857464]
|
|
|
|
mean value: 0.09919826984405518
|
|
|
|
key: test_mcc
|
|
value: [0.33113309 0.58387421 0.41157773 0.47245559 0.4719399 0.4719399
|
|
0.41033786 0.58333333 0.35714286 0.33071891]
|
|
|
|
mean value: 0.44244533877684195
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.87931034 0.9137931 0.87931034 0.89655172 0.89473684 0.89473684
|
|
0.87719298 0.9122807 0.87719298 0.87719298]
|
|
|
|
mean value: 0.8902298850574712
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.22222222 0.54545455 0.46153846 0.4 0.4 0.4
|
|
0.46153846 0.54545455 0.36363636 0.22222222]
|
|
|
|
mean value: 0.4022066822066822
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.6 1. 1. 1.
|
|
0.6 1. 0.66666667 1. ]
|
|
|
|
mean value: 0.8866666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.125 0.375 0.375 0.25 0.25 0.25 0.375 0.375 0.25 0.125]
|
|
|
|
mean value: 0.275
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.6875 0.6675 0.625 0.625 0.625
|
|
0.66709184 0.6875 0.61479592 0.5625 ]
|
|
|
|
mean value: 0.6324387755102041
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.125 0.375 0.3 0.25 0.25 0.25
|
|
0.3 0.375 0.22222222 0.125 ]
|
|
|
|
mean value: 0.25722222222222224
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.90253186 0.92658377 1.01286578 0.99707365 0.99414515 0.96656704
|
|
0.9927001 0.96131229 0.95976067 0.98991156]
|
|
|
|
mean value: 1.0703451871871947
|
|
|
|
key: score_time
|
|
value: [0.26065946 0.2848711 0.26131558 0.25837517 0.22701669 0.19715333
|
|
0.28803372 0.28881145 0.14004779 0.27792072]
|
|
|
|
mean value: 0.24842050075531005
|
|
|
|
key: test_mcc
|
|
value: [0. 0.33113309 0.47245559 0. 0. 0.4719399
|
|
0.48217405 0.4719399 0.19744425 0. ]
|
|
|
|
mean value: 0.24270867865107823
|
|
|
|
key: train_mcc
|
|
value: [0.75819441 0.76667978 0.76667978 0.71874388 0.74878789 0.75730806
|
|
0.74878789 0.73822387 0.73822387 0.70895404]
|
|
|
|
mean value: 0.745058349020781
|
|
|
|
key: test_accuracy
|
|
value: [0.86206897 0.87931034 0.89655172 0.86206897 0.85964912 0.89473684
|
|
0.89473684 0.89473684 0.85964912 0.85964912]
|
|
|
|
mean value: 0.8763157894736842
|
|
|
|
key: train_accuracy
|
|
value: [0.94573643 0.94767442 0.94767442 0.9379845 0.94390716 0.94584139
|
|
0.94390716 0.94197292 0.94197292 0.93617021]
|
|
|
|
mean value: 0.9432841527596599
|
|
|
|
key: test_fscore
|
|
value: [0. 0.22222222 0.4 0. 0. 0.4
|
|
0.5 0.4 0.2 0. ]
|
|
|
|
mean value: 0.21222222222222223
|
|
|
|
key: train_fscore
|
|
value: [0.75862069 0.77310924 0.77310924 0.71929825 0.74782609 0.76271186
|
|
0.74782609 0.74137931 0.74137931 0.7079646 ]
|
|
|
|
mean value: 0.7473224683443556
|
|
|
|
key: test_precision
|
|
value: [0. 1. 1. 0. 0. 1. 0.75 1. 0.5 0. ]
|
|
|
|
mean value: 0.525
|
|
|
|
key: train_precision
|
|
value: [1. 0.9787234 0.9787234 0.97619048 1. 0.97826087
|
|
1. 0.97727273 0.97727273 0.97560976]
|
|
|
|
mean value: 0.9842053364909348
|
|
|
|
key: test_recall
|
|
value: [0. 0.125 0.25 0. 0. 0.25 0.375 0.25 0.125 0. ]
|
|
|
|
mean value: 0.1375
|
|
|
|
key: train_recall
|
|
value: [0.61111111 0.63888889 0.63888889 0.56944444 0.59722222 0.625
|
|
0.59722222 0.59722222 0.59722222 0.55555556]
|
|
|
|
mean value: 0.6027777777777777
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5625 0.625 0.5 0.5 0.625
|
|
0.67729592 0.625 0.55229592 0.5 ]
|
|
|
|
mean value: 0.5667091836734693
|
|
|
|
key: train_roc_auc
|
|
value: [0.80555556 0.81831832 0.81831832 0.7835961 0.79861111 0.8113764
|
|
0.79861111 0.79748752 0.79748752 0.77665418]
|
|
|
|
mean value: 0.8006016128488039
|
|
|
|
key: test_jcc
|
|
value: [0. 0.125 0.25 0. 0. 0.25
|
|
0.33333333 0.25 0.11111111 0. ]
|
|
|
|
mean value: 0.13194444444444445
|
|
|
|
key: train_jcc
|
|
value: [0.61111111 0.63013699 0.63013699 0.56164384 0.59722222 0.61643836
|
|
0.59722222 0.5890411 0.5890411 0.54794521]
|
|
|
|
mean value: 0.5969939117199391
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0253582 0.01072717 0.01081467 0.01073623 0.01069832 0.01075745
|
|
0.01184607 0.01128554 0.01154971 0.01516724]
|
|
|
|
mean value: 0.012894058227539062
|
|
|
|
key: score_time
|
|
value: [0.01045537 0.00921535 0.00922275 0.00922704 0.00899363 0.00909328
|
|
0.0093565 0.00942206 0.01003766 0.00900197]
|
|
|
|
mean value: 0.00940256118774414
|
|
|
|
key: test_mcc
|
|
value: [-0.07559289 0.23343214 0.1924871 -0.09341987 -0.05399492 0.12755102
|
|
0.58333333 0.15658528 0.15658528 0.27295918]
|
|
|
|
mean value: 0.14999256483501192
|
|
|
|
key: train_mcc
|
|
value: [0.30966398 0.28072414 0.26572094 0.28330563 0.23769701 0.26892262
|
|
0.24847359 0.28937166 0.25064836 0.35257444]
|
|
|
|
mean value: 0.2787102363205456
|
|
|
|
key: test_accuracy
|
|
value: [0.82758621 0.84482759 0.82758621 0.81034483 0.84210526 0.78947368
|
|
0.9122807 0.80701754 0.80701754 0.8245614 ]
|
|
|
|
mean value: 0.8292800967937084
|
|
|
|
key: train_accuracy
|
|
value: [0.86046512 0.85077519 0.85465116 0.85465116 0.84526112 0.85299807
|
|
0.84912959 0.85686654 0.85299807 0.86460348]
|
|
|
|
mean value: 0.8542399502196633
|
|
|
|
key: test_fscore
|
|
value: [0. 0.30769231 0.28571429 0. 0. 0.25
|
|
0.54545455 0.26666667 0.26666667 0.375 ]
|
|
|
|
mean value: 0.22971944721944723
|
|
|
|
key: train_fscore
|
|
value: [0.36842105 0.35294118 0.32432432 0.34782609 0.31034483 0.33333333
|
|
0.31578947 0.35087719 0.30909091 0.41666667]
|
|
|
|
mean value: 0.3429615043726796
|
|
|
|
key: test_precision
|
|
value: [0. 0.4 0.33333333 0. 0. 0.25
|
|
1. 0.28571429 0.28571429 0.375 ]
|
|
|
|
mean value: 0.2929761904761905
|
|
|
|
key: train_precision
|
|
value: [0.5 0.44680851 0.46153846 0.46511628 0.40909091 0.45238095
|
|
0.42857143 0.47619048 0.44736842 0.52083333]
|
|
|
|
mean value: 0.4607898771866258
|
|
|
|
key: test_recall
|
|
value: [0. 0.25 0.25 0. 0. 0.25 0.375 0.25 0.25 0.375]
|
|
|
|
mean value: 0.2
|
|
|
|
key: train_recall
|
|
value: [0.29166667 0.29166667 0.25 0.27777778 0.25 0.26388889
|
|
0.25 0.27777778 0.23611111 0.34722222]
|
|
|
|
mean value: 0.27361111111111114
|
|
|
|
key: test_roc_auc
|
|
value: [0.48 0.595 0.585 0.47 0.48979592 0.56377551
|
|
0.6875 0.57397959 0.57397959 0.63647959]
|
|
|
|
mean value: 0.5655510204081632
|
|
|
|
key: train_roc_auc
|
|
value: [0.62218468 0.61655405 0.60135135 0.61298799 0.59578652 0.60610175
|
|
0.59803371 0.61416979 0.59446005 0.64776841]
|
|
|
|
mean value: 0.6109398302797179
|
|
|
|
key: test_jcc
|
|
value: [0. 0.18181818 0.16666667 0. 0. 0.14285714
|
|
0.375 0.15384615 0.15384615 0.23076923]
|
|
|
|
mean value: 0.140480352980353
|
|
|
|
key: train_jcc
|
|
value: [0.22580645 0.21428571 0.19354839 0.21052632 0.18367347 0.2
|
|
0.1875 0.21276596 0.1827957 0.26315789]
|
|
|
|
mean value: 0.20740598892810022
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.13004971 0.08632278 0.08112955 0.08578324 0.09619546 0.07739091
|
|
0.08239293 0.20527792 0.07903028 0.08405089]
|
|
|
|
mean value: 0.10076236724853516
|
|
|
|
key: score_time
|
|
value: [0.01123238 0.01125193 0.01100922 0.0110631 0.01110554 0.01116109
|
|
0.01117873 0.01108193 0.01099992 0.01134038]
|
|
|
|
mean value: 0.011142420768737792
|
|
|
|
key: test_mcc
|
|
value: [0.61922967 0.7952381 0.41157773 0.92619822 0.6799001 0.6103927
|
|
0.61824189 0.68429694 0.5197192 0.70918367]
|
|
|
|
mean value: 0.6573978206284674
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9137931 0.94827586 0.87931034 0.98275862 0.92982456 0.89473684
|
|
0.9122807 0.92982456 0.89473684 0.92982456]
|
|
|
|
mean value: 0.9215366001209921
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.82352941 0.46153846 0.93333333 0.66666667 0.66666667
|
|
0.66666667 0.71428571 0.57142857 0.75 ]
|
|
|
|
mean value: 0.6920782159017453
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.77777778 0.6 1. 1. 0.6
|
|
0.71428571 0.83333333 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7656349206349207
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.875 0.375 0.875 0.5 0.75 0.625 0.625 0.5 0.75 ]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.7925 0.9175 0.6675 0.9375 0.75 0.83418367
|
|
0.79209184 0.80229592 0.72959184 0.85459184]
|
|
|
|
mean value: 0.8077755102040816
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.7 0.3 0.875 0.5 0.5
|
|
0.5 0.55555556 0.4 0.6 ]
|
|
|
|
mean value: 0.5430555555555555
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.74
|
|
|
|
Accuracy on Blind test: 0.94
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05385113 0.07887149 0.06488419 0.07870007 0.04349208 0.09474182
|
|
0.0630188 0.07406092 0.04248405 0.08584023]
|
|
|
|
mean value: 0.06799447536468506
|
|
|
|
key: score_time
|
|
value: [0.02132988 0.01284051 0.0187552 0.01454592 0.01265979 0.03040051
|
|
0.01888776 0.01252842 0.01235175 0.01234198]
|
|
|
|
mean value: 0.01666417121887207
|
|
|
|
key: test_mcc
|
|
value: [0.23343214 0.87635609 0.57173935 0.1924871 0.46435635 0.3790877
|
|
0.51760051 0.14149261 0.56377551 0.3790877 ]
|
|
|
|
mean value: 0.43194150579814455
|
|
|
|
key: train_mcc
|
|
value: [0.81132564 0.759597 0.78356182 0.82955348 0.76481594 0.80036717
|
|
0.79293741 0.80795441 0.82620572 0.80218914]
|
|
|
|
mean value: 0.797850773474279
|
|
|
|
key: test_accuracy
|
|
value: [0.84482759 0.96551724 0.87931034 0.82758621 0.87719298 0.84210526
|
|
0.87719298 0.73684211 0.89473684 0.84210526]
|
|
|
|
mean value: 0.8587416817906836
|
|
|
|
key: train_accuracy
|
|
value: [0.95542636 0.94379845 0.9496124 0.95930233 0.94584139 0.95357834
|
|
0.9516441 0.95551257 0.95938104 0.95357834]
|
|
|
|
mean value: 0.9527675318249291
|
|
|
|
key: test_fscore
|
|
value: [0.30769231 0.88888889 0.63157895 0.28571429 0.53333333 0.47058824
|
|
0.58823529 0.28571429 0.625 0.47058824]
|
|
|
|
mean value: 0.5087333813417405
|
|
|
|
key: train_fscore
|
|
value: [0.83687943 0.79136691 0.8115942 0.85314685 0.79411765 0.82608696
|
|
0.82014388 0.83211679 0.84892086 0.82857143]
|
|
|
|
mean value: 0.8242944963818936
|
|
|
|
key: test_precision
|
|
value: [0.4 0.8 0.54545455 0.33333333 0.57142857 0.44444444
|
|
0.55555556 0.23076923 0.625 0.44444444]
|
|
|
|
mean value: 0.49504301254301253
|
|
|
|
key: train_precision
|
|
value: [0.85507246 0.82089552 0.84848485 0.85915493 0.84375 0.86363636
|
|
0.85074627 0.87692308 0.88059701 0.85294118]
|
|
|
|
mean value: 0.8552201664830608
|
|
|
|
key: test_recall
|
|
value: [0.25 1. 0.75 0.25 0.5 0.5 0.625 0.375 0.625 0.5 ]
|
|
|
|
mean value: 0.5375
|
|
|
|
key: train_recall
|
|
value: [0.81944444 0.76388889 0.77777778 0.84722222 0.75 0.79166667
|
|
0.79166667 0.79166667 0.81944444 0.80555556]
|
|
|
|
mean value: 0.7958333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.595 0.98 0.825 0.585 0.71938776 0.69897959
|
|
0.77168367 0.58545918 0.78188776 0.69897959]
|
|
|
|
mean value: 0.7241377551020408
|
|
|
|
key: train_roc_auc
|
|
value: [0.89846096 0.86843093 0.87762763 0.91234985 0.86376404 0.88572097
|
|
0.88459738 0.88684457 0.90073346 0.89154182]
|
|
|
|
mean value: 0.887007161656038
|
|
|
|
key: test_jcc
|
|
value: [0.18181818 0.8 0.46153846 0.16666667 0.36363636 0.30769231
|
|
0.41666667 0.16666667 0.45454545 0.30769231]
|
|
|
|
mean value: 0.3626923076923077
|
|
|
|
key: train_jcc
|
|
value: [0.7195122 0.6547619 0.68292683 0.74390244 0.65853659 0.7037037
|
|
0.69512195 0.7125 0.7375 0.70731707]
|
|
|
|
mean value: 0.701578268163634
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01512098 0.01111913 0.01214242 0.01210713 0.01120567 0.01140928
|
|
0.01125884 0.0109942 0.01123261 0.01087618]
|
|
|
|
mean value: 0.011746644973754883
|
|
|
|
key: score_time
|
|
value: [0.01422715 0.01133323 0.00984669 0.0098834 0.0095768 0.00970602
|
|
0.00978208 0.00970817 0.0096364 0.0097878 ]
|
|
|
|
mean value: 0.010348773002624512
|
|
|
|
key: test_mcc
|
|
value: [ 0.19843135 0.35810951 0.48309381 0.13234482 -0.05399492 0.4719399
|
|
0.35514145 0.35714286 0.35714286 0.19744425]
|
|
|
|
mean value: 0.28567958832099444
|
|
|
|
key: train_mcc
|
|
value: [0.50590291 0.41170635 0.42576849 0.43599081 0.47928789 0.44346732
|
|
0.44332419 0.48614104 0.46632144 0.44754663]
|
|
|
|
mean value: 0.4545457072831678
|
|
|
|
key: test_accuracy
|
|
value: [0.86206897 0.87931034 0.89655172 0.84482759 0.84210526 0.89473684
|
|
0.85964912 0.87719298 0.87719298 0.85964912]
|
|
|
|
mean value: 0.8693284936479129
|
|
|
|
key: train_accuracy
|
|
value: [0.89728682 0.88565891 0.8875969 0.88953488 0.89555126 0.88974855
|
|
0.88781431 0.89555126 0.89361702 0.88974855]
|
|
|
|
mean value: 0.8912108467155474
|
|
|
|
key: test_fscore
|
|
value: [0.2 0.36363636 0.5 0.18181818 0. 0.4
|
|
0.42857143 0.36363636 0.36363636 0.2 ]
|
|
|
|
mean value: 0.30012987012987014
|
|
|
|
key: train_fscore
|
|
value: [0.53913043 0.41584158 0.43137255 0.43564356 0.49056604 0.45714286
|
|
0.47272727 0.50909091 0.47619048 0.46728972]
|
|
|
|
mean value: 0.4694995404830601
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.75 0.33333333 0. 1.
|
|
0.5 0.66666667 0.66666667 0.5 ]
|
|
|
|
mean value: 0.5583333333333333
|
|
|
|
key: train_precision
|
|
value: [0.72093023 0.72413793 0.73333333 0.75862069 0.76470588 0.72727273
|
|
0.68421053 0.73684211 0.75757576 0.71428571]
|
|
|
|
mean value: 0.7321914899647215
|
|
|
|
key: test_recall
|
|
value: [0.125 0.25 0.375 0.125 0. 0.25 0.375 0.25 0.25 0.125]
|
|
|
|
mean value: 0.2125
|
|
|
|
key: train_recall
|
|
value: [0.43055556 0.29166667 0.30555556 0.30555556 0.36111111 0.33333333
|
|
0.36111111 0.38888889 0.34722222 0.34722222]
|
|
|
|
mean value: 0.3472222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.5525 0.615 0.6775 0.5425 0.48979592 0.625
|
|
0.65688776 0.61479592 0.61479592 0.55229592]
|
|
|
|
mean value: 0.5941071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.70176426 0.63682432 0.64376877 0.64489489 0.67156679 0.65655431
|
|
0.66707241 0.68320849 0.66462235 0.66237516]
|
|
|
|
mean value: 0.6632651752876472
|
|
|
|
key: test_jcc
|
|
value: [0.11111111 0.22222222 0.33333333 0.1 0. 0.25
|
|
0.27272727 0.22222222 0.22222222 0.11111111]
|
|
|
|
mean value: 0.1844949494949495
|
|
|
|
key: train_jcc
|
|
value: [0.36904762 0.2625 0.275 0.27848101 0.325 0.2962963
|
|
0.30952381 0.34146341 0.3125 0.30487805]
|
|
|
|
mean value: 0.3074690200940587
|
|
|
|
MCC on Blind test: 0.46
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01609254 0.02345705 0.01742911 0.02176404 0.02223349 0.02119112
|
|
0.02198172 0.01799989 0.01906919 0.02118444]
|
|
|
|
mean value: 0.020240259170532227
|
|
|
|
key: score_time
|
|
value: [0.01867962 0.01163387 0.01198292 0.01211667 0.01226664 0.01243877
|
|
0.01214099 0.01208258 0.01212811 0.01211047]
|
|
|
|
mean value: 0.01275806427001953
|
|
|
|
key: test_mcc
|
|
value: [0.31226111 0.35810951 0.48309381 0. 0.6103927 0.76742577
|
|
0.41033786 0.27295918 0.46435635 0.5197192 ]
|
|
|
|
mean value: 0.41986554934605175
|
|
|
|
key: train_mcc
|
|
value: [0.69415955 0.57014305 0.58169758 0.47211393 0.6407403 0.66760372
|
|
0.60434847 0.66992085 0.72460535 0.72854403]
|
|
|
|
mean value: 0.6353876843571422
|
|
|
|
key: test_accuracy
|
|
value: [0.84482759 0.87931034 0.89655172 0.86206897 0.89473684 0.94736842
|
|
0.87719298 0.8245614 0.87719298 0.89473684]
|
|
|
|
mean value: 0.8798548094373866
|
|
|
|
key: train_accuracy
|
|
value: [0.91666667 0.91085271 0.9127907 0.89534884 0.88394584 0.92843327
|
|
0.91682785 0.9245648 0.92649903 0.93810445]
|
|
|
|
mean value: 0.915403415650818
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.36363636 0.5 0. 0.66666667 0.76923077
|
|
0.46153846 0.375 0.53333333 0.57142857]
|
|
|
|
mean value: 0.46408341658341656
|
|
|
|
key: train_fscore
|
|
value: [0.73619632 0.54 0.55445545 0.4 0.68085106 0.67256637
|
|
0.58252427 0.71111111 0.7625 0.76119403]
|
|
|
|
mean value: 0.640139861288068
|
|
|
|
key: test_precision
|
|
value: [0.42857143 0.66666667 0.75 0. 0.6 1.
|
|
0.6 0.375 0.57142857 0.66666667]
|
|
|
|
mean value: 0.5658333333333333
|
|
|
|
key: train_precision
|
|
value: [0.65934066 0.96428571 0.96551724 1. 0.55172414 0.92682927
|
|
0.96774194 0.76190476 0.69318182 0.82258065]
|
|
|
|
mean value: 0.8313106181961143
|
|
|
|
key: test_recall
|
|
value: [0.375 0.25 0.375 0. 0.75 0.625 0.375 0.375 0.5 0.5 ]
|
|
|
|
mean value: 0.4125
|
|
|
|
key: train_recall
|
|
value: [0.83333333 0.375 0.38888889 0.25 0.88888889 0.52777778
|
|
0.41666667 0.66666667 0.84722222 0.70833333]
|
|
|
|
mean value: 0.5902777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.6475 0.615 0.6775 0.5 0.83418367 0.8125
|
|
0.66709184 0.63647959 0.71938776 0.72959184]
|
|
|
|
mean value: 0.6839234693877551
|
|
|
|
key: train_roc_auc
|
|
value: [0.88175676 0.68637387 0.69331832 0.625 0.88601748 0.7605181
|
|
0.70720974 0.8164794 0.89327403 0.84180712]
|
|
|
|
mean value: 0.7791754816614367
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.22222222 0.33333333 0. 0.5 0.625
|
|
0.3 0.23076923 0.36363636 0.4 ]
|
|
|
|
mean value: 0.32249611499611497
|
|
|
|
key: train_jcc
|
|
value: [0.58252427 0.36986301 0.38356164 0.25 0.51612903 0.50666667
|
|
0.4109589 0.55172414 0.61616162 0.61445783]
|
|
|
|
mean value: 0.48020471178311785
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02401924 0.02309346 0.01876807 0.01801634 0.02470398 0.02270722
|
|
0.02318597 0.02296734 0.02565241 0.01829553]
|
|
|
|
mean value: 0.02214095592498779
|
|
|
|
key: score_time
|
|
value: [0.01215386 0.01210546 0.0120554 0.01208019 0.0121057 0.01218271
|
|
0.01243639 0.01226568 0.01213861 0.01210523]
|
|
|
|
mean value: 0.012162923812866211
|
|
|
|
key: test_mcc
|
|
value: [0.31665564 0.74398981 0.61922967 0.48309381 0.58333333 0.68429694
|
|
0.53440971 0.58333333 0.5197192 0.23208716]
|
|
|
|
mean value: 0.5300148595392379
|
|
|
|
key: train_mcc
|
|
value: [0.71335715 0.762027 0.62623634 0.7525249 0.76929815 0.73942494
|
|
0.60100428 0.6880944 0.78805132 0.54974524]
|
|
|
|
mean value: 0.6989763735222967
|
|
|
|
key: test_accuracy
|
|
value: [0.81034483 0.93103448 0.9137931 0.89655172 0.9122807 0.92982456
|
|
0.8245614 0.9122807 0.89473684 0.68421053]
|
|
|
|
mean value: 0.870961887477314
|
|
|
|
key: train_accuracy
|
|
value: [0.9127907 0.94379845 0.92054264 0.94186047 0.94777563 0.94197292
|
|
0.84332689 0.93230174 0.9516441 0.82205029]
|
|
|
|
mean value: 0.9158063814793157
|
|
|
|
key: test_fscore
|
|
value: [0.42105263 0.77777778 0.66666667 0.5 0.54545455 0.71428571
|
|
0.58333333 0.54545455 0.57142857 0.35714286]
|
|
|
|
mean value: 0.5682596643122959
|
|
|
|
key: train_fscore
|
|
value: [0.74576271 0.79432624 0.60952381 0.78571429 0.79389313 0.76190476
|
|
0.63013699 0.69565217 0.81203008 0.58928571]
|
|
|
|
mean value: 0.7218229889601105
|
|
|
|
key: test_precision
|
|
value: [0.36363636 0.7 0.71428571 0.75 1. 0.83333333
|
|
0.4375 1. 0.66666667 0.25 ]
|
|
|
|
mean value: 0.6715422077922077
|
|
|
|
key: train_precision
|
|
value: [0.62857143 0.8115942 0.96969697 0.80882353 0.88135593 0.88888889
|
|
0.46938776 0.93023256 0.8852459 0.43421053]
|
|
|
|
mean value: 0.7708007692867702
|
|
|
|
key: test_recall
|
|
value: [0.5 0.875 0.625 0.375 0.375 0.625 0.875 0.375 0.5 0.625]
|
|
|
|
mean value: 0.575
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.77777778 0.44444444 0.76388889 0.72222222 0.66666667
|
|
0.95833333 0.55555556 0.75 0.91666667]
|
|
|
|
mean value: 0.7472222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.68 0.9075 0.7925 0.6775 0.6875 0.80229592
|
|
0.84566327 0.6875 0.72959184 0.65943878]
|
|
|
|
mean value: 0.7469489795918367
|
|
|
|
key: train_roc_auc
|
|
value: [0.91441441 0.87424925 0.7210961 0.8673048 0.85324594 0.82659176
|
|
0.89152622 0.77440699 0.86713483 0.86170412]
|
|
|
|
mean value: 0.8451674427236224
|
|
|
|
key: test_jcc
|
|
value: [0.26666667 0.63636364 0.5 0.33333333 0.375 0.55555556
|
|
0.41176471 0.375 0.4 0.2173913 ]
|
|
|
|
mean value: 0.40710752021493707
|
|
|
|
key: train_jcc
|
|
value: [0.59459459 0.65882353 0.43835616 0.64705882 0.65822785 0.61538462
|
|
0.46 0.53333333 0.6835443 0.41772152]
|
|
|
|
mean value: 0.5707044731523357
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20487475 0.19417882 0.19340062 0.19247127 0.19269037 0.19388175
|
|
0.19496179 0.19257259 0.18974113 0.18232441]
|
|
|
|
mean value: 0.19310975074768066
|
|
|
|
key: score_time
|
|
value: [0.01691103 0.0172224 0.01682401 0.01706553 0.01686454 0.01620436
|
|
0.01650167 0.01728606 0.01521683 0.01530719]
|
|
|
|
mean value: 0.01654036045074463
|
|
|
|
key: test_mcc
|
|
value: [0.48309381 0.52084744 0.65714286 0.7678689 0.58333333 0.56377551
|
|
0.61824189 0.35514145 0.61824189 0.51760051]
|
|
|
|
mean value: 0.5685287580868678
|
|
|
|
key: train_mcc
|
|
value: [0.9259886 0.96746666 0.96771772 0.95922155 0.95923394 0.98386392
|
|
0.95927892 0.95923394 0.98380498 0.95114834]
|
|
|
|
mean value: 0.9616958559269236
|
|
|
|
key: test_accuracy
|
|
value: [0.89655172 0.89655172 0.9137931 0.94827586 0.9122807 0.89473684
|
|
0.9122807 0.85964912 0.9122807 0.87719298]
|
|
|
|
mean value: 0.9023593466424682
|
|
|
|
key: train_accuracy
|
|
value: [0.98255814 0.99224806 0.99224806 0.99031008 0.99032882 0.99613153
|
|
0.99032882 0.99032882 0.99613153 0.98839458]
|
|
|
|
mean value: 0.9909008441665542
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.57142857 0.70588235 0.76923077 0.54545455 0.625
|
|
0.66666667 0.42857143 0.66666667 0.58823529]
|
|
|
|
mean value: 0.6067136295077472
|
|
|
|
key: train_fscore
|
|
value: [0.9352518 0.97183099 0.97222222 0.96453901 0.96453901 0.98611111
|
|
0.96402878 0.96453901 0.98591549 0.95774648]
|
|
|
|
mean value: 0.9666723887895976
|
|
|
|
key: test_precision
|
|
value: [0.75 0.66666667 0.66666667 1. 1. 0.625
|
|
0.71428571 0.5 0.71428571 0.55555556]
|
|
|
|
mean value: 0.7192460317460317
|
|
|
|
key: train_precision
|
|
value: [0.97014925 0.98571429 0.97222222 0.98550725 0.98550725 0.98611111
|
|
1. 0.98550725 1. 0.97142857]
|
|
|
|
mean value: 0.9842147183337969
|
|
|
|
key: test_recall
|
|
value: [0.375 0.5 0.75 0.625 0.375 0.625 0.625 0.375 0.625 0.625]
|
|
|
|
mean value: 0.55
|
|
|
|
key: train_recall
|
|
value: [0.90277778 0.95833333 0.97222222 0.94444444 0.94444444 0.98611111
|
|
0.93055556 0.94444444 0.97222222 0.94444444]
|
|
|
|
mean value: 0.95
|
|
|
|
key: test_roc_auc
|
|
value: [0.6775 0.73 0.845 0.8125 0.6875 0.78188776
|
|
0.79209184 0.65688776 0.79209184 0.77168367]
|
|
|
|
mean value: 0.7547142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.94913664 0.97804054 0.98385886 0.9710961 0.97109863 0.99193196
|
|
0.96527778 0.97109863 0.98611111 0.96997503]
|
|
|
|
mean value: 0.9737625265715154
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.4 0.54545455 0.625 0.375 0.45454545
|
|
0.5 0.27272727 0.5 0.41666667]
|
|
|
|
mean value: 0.44227272727272726
|
|
|
|
key: train_jcc
|
|
value: [0.87837838 0.94520548 0.94594595 0.93150685 0.93150685 0.97260274
|
|
0.93055556 0.93150685 0.97222222 0.91891892]
|
|
|
|
mean value: 0.9358349788144309
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0964222 0.11988497 0.08231401 0.10838437 0.10221148 0.08588648
|
|
0.08572316 0.10263085 0.10275578 0.09148932]
|
|
|
|
mean value: 0.09777026176452637
|
|
|
|
key: score_time
|
|
value: [0.03362894 0.04247355 0.04225659 0.02943635 0.03856683 0.03235412
|
|
0.03946471 0.04030418 0.03721571 0.04162025]
|
|
|
|
mean value: 0.03773212432861328
|
|
|
|
key: test_mcc
|
|
value: [0.68502761 0.92619822 0.565 0.7678689 0.41033786 0.6103927
|
|
0.61824189 0.61824189 0.5197192 0.70918367]
|
|
|
|
mean value: 0.643021192066826
|
|
|
|
key: train_mcc
|
|
value: [0.96747708 0.92607553 0.95926692 0.98380002 0.97566179 0.96748679
|
|
0.98380498 0.96747666 0.97566179 0.95095702]
|
|
|
|
mean value: 0.9657668577677593
|
|
|
|
key: test_accuracy
|
|
value: [0.93103448 0.98275862 0.89655172 0.94827586 0.87719298 0.89473684
|
|
0.9122807 0.9122807 0.89473684 0.92982456]
|
|
|
|
mean value: 0.917967332123412
|
|
|
|
key: train_accuracy
|
|
value: [0.99224806 0.98255814 0.99031008 0.99612403 0.99419729 0.99226306
|
|
0.99613153 0.99226306 0.99419729 0.98839458]
|
|
|
|
mean value: 0.9918687118588158
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.93333333 0.625 0.76923077 0.46153846 0.66666667
|
|
0.66666667 0.66666667 0.57142857 0.75 ]
|
|
|
|
mean value: 0.6824816849816849
|
|
|
|
key: train_fscore
|
|
value: [0.97142857 0.93333333 0.96402878 0.98591549 0.9787234 0.97142857
|
|
0.98591549 0.97183099 0.9787234 0.95714286]
|
|
|
|
mean value: 0.9698470890653375
|
|
|
|
key: test_precision
|
|
value: [0.83333333 1. 0.625 1. 0.6 0.6
|
|
0.71428571 0.71428571 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7503571428571428
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.98571429 1. 0.98529412]
|
|
|
|
mean value: 0.9971008403361344
|
|
|
|
key: test_recall
|
|
value: [0.625 0.875 0.625 0.625 0.375 0.75 0.625 0.625 0.5 0.75 ]
|
|
|
|
mean value: 0.6375
|
|
|
|
key: train_recall
|
|
value: [0.94444444 0.875 0.93055556 0.97222222 0.95833333 0.94444444
|
|
0.97222222 0.95833333 0.95833333 0.93055556]
|
|
|
|
mean value: 0.9444444444444444
|
|
|
|
key: test_roc_auc
|
|
value: [0.8025 0.9375 0.7825 0.8125 0.66709184 0.83418367
|
|
0.79209184 0.79209184 0.72959184 0.85459184]
|
|
|
|
mean value: 0.8004642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.97222222 0.9375 0.96527778 0.98611111 0.97916667 0.97222222
|
|
0.98611111 0.97804307 0.97916667 0.96415418]
|
|
|
|
mean value: 0.9719975031210987
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.875 0.45454545 0.625 0.3 0.5
|
|
0.5 0.5 0.4 0.6 ]
|
|
|
|
mean value: 0.531010101010101
|
|
|
|
key: train_jcc
|
|
value: [0.94444444 0.875 0.93055556 0.97222222 0.95833333 0.94444444
|
|
0.97222222 0.94520548 0.95833333 0.91780822]
|
|
|
|
mean value: 0.9418569254185692
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.18251801 0.15320945 0.20837259 0.26758099 0.22566319 0.17006564
|
|
0.18142509 0.19449568 0.22807431 0.16830635]
|
|
|
|
mean value: 0.19797112941741943
|
|
|
|
key: score_time
|
|
value: [0.03162575 0.03150916 0.02925968 0.03666377 0.02728319 0.02698755
|
|
0.03290415 0.0155654 0.01600456 0.03157401]
|
|
|
|
mean value: 0.027937722206115723
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. -0.09341987 0. 0.33071891 0.
|
|
0. -0.07705141 0. 0. ]
|
|
|
|
mean value: 0.016024762773074908
|
|
|
|
key: train_mcc
|
|
value: [0.84018065 0.83131941 0.8577409 0.83131941 0.85777644 0.83135967
|
|
0.84021939 0.87512939 0.84021939 0.86647754]
|
|
|
|
mean value: 0.8471742188356246
|
|
|
|
key: test_accuracy
|
|
value: [0.86206897 0.86206897 0.81034483 0.86206897 0.87719298 0.85964912
|
|
0.85964912 0.8245614 0.85964912 0.85964912]
|
|
|
|
mean value: 0.8536902601330913
|
|
|
|
key: train_accuracy
|
|
value: [0.96317829 0.96124031 0.96705426 0.96124031 0.96711799 0.96131528
|
|
0.96324952 0.97098646 0.96324952 0.96905222]
|
|
|
|
mean value: 0.9647684164754922
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0.22222222 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.02222222222222222
|
|
|
|
key: train_fscore
|
|
value: [0.848 0.83870968 0.86614173 0.83870968 0.86614173 0.83870968
|
|
0.848 0.88372093 0.848 0.875 ]
|
|
|
|
mean value: 0.8551133427057552
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.1
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0.125 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.0125
|
|
|
|
key: train_recall
|
|
value: [0.73611111 0.72222222 0.76388889 0.72222222 0.76388889 0.72222222
|
|
0.73611111 0.79166667 0.73611111 0.77777778]
|
|
|
|
mean value: 0.7472222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.47 0.5 0.5625 0.5
|
|
0.5 0.47959184 0.5 0.5 ]
|
|
|
|
mean value: 0.5012091836734693
|
|
|
|
key: train_roc_auc
|
|
value: [0.86805556 0.86111111 0.88194444 0.86111111 0.88194444 0.86111111
|
|
0.86805556 0.89583333 0.86805556 0.88888889]
|
|
|
|
mean value: 0.8736111111111111
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0.125 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.0125
|
|
|
|
key: train_jcc
|
|
value: [0.73611111 0.72222222 0.76388889 0.72222222 0.76388889 0.72222222
|
|
0.73611111 0.79166667 0.73611111 0.77777778]
|
|
|
|
mean value: 0.7472222222222222
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.82511854 0.8043623 0.75435877 0.76228428 0.75748801 0.7663424
|
|
0.74598718 0.7539773 0.7721374 0.74886751]
|
|
|
|
mean value: 0.7690923690795899
|
|
|
|
key: score_time
|
|
value: [0.01233506 0.01007676 0.00954175 0.0094099 0.00960803 0.01258469
|
|
0.00961041 0.00993156 0.00963855 0.00943804]
|
|
|
|
mean value: 0.01021747589111328
|
|
|
|
key: test_mcc
|
|
value: [0.565 0.855 0.65714286 0.68502761 0.58333333 0.6103927
|
|
0.70918367 0.77212742 0.61824189 0.56377551]
|
|
|
|
mean value: 0.6619224988162519
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.99191739 1. 1. 1. ]
|
|
|
|
mean value: 0.9991917385497009
|
|
|
|
key: test_accuracy
|
|
value: [0.89655172 0.96551724 0.9137931 0.93103448 0.9122807 0.89473684
|
|
0.92982456 0.94736842 0.9122807 0.89473684]
|
|
|
|
mean value: 0.9198124621899576
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.99806576 1. 1. 1. ]
|
|
|
|
mean value: 0.9998065764023211
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.875 0.70588235 0.71428571 0.54545455 0.66666667
|
|
0.75 0.8 0.66666667 0.625 ]
|
|
|
|
mean value: 0.6973955946014769
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.99300699 1. 1. 1. ]
|
|
|
|
mean value: 0.9993006993006993
|
|
|
|
key: test_precision
|
|
value: [0.625 0.875 0.66666667 0.83333333 1. 0.6
|
|
0.75 0.85714286 0.71428571 0.625 ]
|
|
|
|
mean value: 0.7546428571428572
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.875 0.75 0.625 0.375 0.75 0.75 0.75 0.625 0.625]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98611111 1. 1. 1. ]
|
|
|
|
mean value: 0.9986111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.7825 0.9275 0.845 0.8025 0.6875 0.83418367
|
|
0.85459184 0.86479592 0.79209184 0.78188776]
|
|
|
|
mean value: 0.8172551020408163
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.99305556 1. 1. 1. ]
|
|
|
|
mean value: 0.9993055555555556
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.77777778 0.54545455 0.55555556 0.375 0.5
|
|
0.6 0.66666667 0.5 0.45454545]
|
|
|
|
mean value: 0.5429545454545455
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98611111 1. 1. 1. ]
|
|
|
|
mean value: 0.9986111111111111
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0338757 0.03136754 0.03357816 0.05422306 0.0376687 0.03553629
|
|
0.03695703 0.03933406 0.0856638 0.05868649]
|
|
|
|
mean value: 0.044689083099365236
|
|
|
|
key: score_time
|
|
value: [0.01247787 0.01441693 0.01502013 0.01263857 0.0131197 0.01694751
|
|
0.01325035 0.01476479 0.02269197 0.01454806]
|
|
|
|
mean value: 0.014987587928771973
|
|
|
|
key: test_mcc
|
|
value: [-0.09341987 0. -0.10886621 0.13234482 -0.0952381 -0.07705141
|
|
-0.0952381 -0.21963032 -0.0952381 -0.0952381 ]
|
|
|
|
mean value: -0.0747575375897841
|
|
|
|
key: train_mcc
|
|
value: [0.15490259 0.10942628 0.18990097 0.21949279 0.10944323 0.1549263
|
|
0.1549263 0.21952555 0.10944323 0.10944323]
|
|
|
|
mean value: 0.15314304567449927
|
|
|
|
key: test_accuracy
|
|
value: [0.81034483 0.86206897 0.79310345 0.84482759 0.80701754 0.8245614
|
|
0.80701754 0.63157895 0.80701754 0.80701754]
|
|
|
|
mean value: 0.7994555353901996
|
|
|
|
key: train_accuracy
|
|
value: [0.86434109 0.8624031 0.86627907 0.86821705 0.86266925 0.86460348
|
|
0.86460348 0.86847195 0.86266925 0.86266925]
|
|
|
|
mean value: 0.864692696384928
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0.18181818 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.01818181818181818
|
|
|
|
key: train_fscore
|
|
value: [0.05405405 0.02739726 0.08 0.10526316 0.02739726 0.05405405
|
|
0.05405405 0.10526316 0.02739726 0.02739726]
|
|
|
|
mean value: 0.056227751904752626
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0.33333333 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.03333333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0.125 0. 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.0125
|
|
|
|
key: train_recall
|
|
value: [0.02777778 0.01388889 0.04166667 0.05555556 0.01388889 0.02777778
|
|
0.02777778 0.05555556 0.01388889 0.01388889]
|
|
|
|
mean value: 0.029166666666666664
|
|
|
|
key: test_roc_auc
|
|
value: [0.47 0.5 0.46 0.5425 0.46938776 0.47959184
|
|
0.46938776 0.36734694 0.46938776 0.46938776]
|
|
|
|
mean value: 0.4696989795918367
|
|
|
|
key: train_roc_auc
|
|
value: [0.51388889 0.50694444 0.52083333 0.52777778 0.50694444 0.51388889
|
|
0.51388889 0.52777778 0.50694444 0.50694444]
|
|
|
|
mean value: 0.5145833333333333
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0.1 0. 0. 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.01
|
|
|
|
key: train_jcc
|
|
value: [0.02777778 0.01388889 0.04166667 0.05555556 0.01388889 0.02777778
|
|
0.02777778 0.05555556 0.01388889 0.01388889]
|
|
|
|
mean value: 0.029166666666666664
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01792407 0.01629615 0.01605296 0.03605795 0.01615262 0.02317548
|
|
0.02347136 0.04255676 0.05212307 0.03821516]
|
|
|
|
mean value: 0.02820255756378174
|
|
|
|
key: score_time
|
|
value: [0.02907062 0.01406908 0.01210213 0.01898623 0.01239657 0.01225352
|
|
0.01550245 0.01889801 0.01899934 0.02266169]
|
|
|
|
mean value: 0.01749396324157715
|
|
|
|
key: test_mcc
|
|
value: [0.35810951 0.77271394 0.65714286 0.33113309 0.4719399 0.76742577
|
|
0.61824189 0.48217405 0.58888181 0.6799001 ]
|
|
|
|
mean value: 0.572766293012361
|
|
|
|
key: train_mcc
|
|
value: [0.74743955 0.66948865 0.71799991 0.72780333 0.7100299 0.72876544
|
|
0.70906519 0.74064358 0.74750098 0.71895922]
|
|
|
|
mean value: 0.7217695750772882
|
|
|
|
key: test_accuracy
|
|
value: [0.87931034 0.94827586 0.9137931 0.87931034 0.89473684 0.94736842
|
|
0.9122807 0.89473684 0.9122807 0.92982456]
|
|
|
|
mean value: 0.9111917725347852
|
|
|
|
key: train_accuracy
|
|
value: [0.94379845 0.92829457 0.9379845 0.93992248 0.93617021 0.94003868
|
|
0.93617021 0.94197292 0.94390716 0.93810445]
|
|
|
|
mean value: 0.9386363636363636
|
|
|
|
key: test_fscore
|
|
value: [0.36363636 0.8 0.70588235 0.22222222 0.4 0.76923077
|
|
0.66666667 0.5 0.61538462 0.66666667]
|
|
|
|
mean value: 0.570968965674848
|
|
|
|
key: train_fscore
|
|
value: [0.76422764 0.68907563 0.72881356 0.7394958 0.73170732 0.74796748
|
|
0.72727273 0.765625 0.76422764 0.73770492]
|
|
|
|
mean value: 0.7396117714499789
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.85714286 0.66666667 1. 1. 1.
|
|
0.71428571 0.75 0.8 1. ]
|
|
|
|
mean value: 0.8454761904761905
|
|
|
|
key: train_precision
|
|
value: [0.92156863 0.87234043 0.93478261 0.93617021 0.88235294 0.90196078
|
|
0.89795918 0.875 0.92156863 0.9 ]
|
|
|
|
mean value: 0.9043703411059151
|
|
|
|
key: test_recall
|
|
value: [0.25 0.75 0.75 0.125 0.25 0.625 0.625 0.375 0.5 0.5 ]
|
|
|
|
mean value: 0.475
|
|
|
|
key: train_recall
|
|
value: [0.65277778 0.56944444 0.59722222 0.61111111 0.625 0.63888889
|
|
0.61111111 0.68055556 0.65277778 0.625 ]
|
|
|
|
mean value: 0.6263888888888889
|
|
|
|
key: test_roc_auc
|
|
value: [0.615 0.865 0.845 0.5625 0.625 0.8125
|
|
0.79209184 0.67729592 0.73979592 0.75 ]
|
|
|
|
mean value: 0.7284183673469388
|
|
|
|
key: train_roc_auc
|
|
value: [0.82188438 0.77796547 0.79523273 0.80217718 0.80575843 0.81382647
|
|
0.79993758 0.83241261 0.82189451 0.80688202]
|
|
|
|
mean value: 0.807797137024665
|
|
|
|
key: test_jcc
|
|
value: [0.22222222 0.66666667 0.54545455 0.125 0.25 0.625
|
|
0.5 0.33333333 0.44444444 0.5 ]
|
|
|
|
mean value: 0.4212121212121212
|
|
|
|
key: train_jcc
|
|
value: [0.61842105 0.52564103 0.57333333 0.58666667 0.57692308 0.5974026
|
|
0.57142857 0.62025316 0.61842105 0.58441558]
|
|
|
|
mean value: 0.5872906125630976
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.34602094 0.4119401 0.34352207 0.31059027 0.29616928 0.30965281
|
|
0.30067015 0.42204738 0.2247901 0.29215574]
|
|
|
|
mean value: 0.3257558822631836
|
|
|
|
key: score_time
|
|
value: [0.02013493 0.0188508 0.02149749 0.01887989 0.01892638 0.01915336
|
|
0.0191896 0.01900268 0.03404069 0.01908517]
|
|
|
|
mean value: 0.020876097679138183
|
|
|
|
key: test_mcc
|
|
value: [0.35810951 0.77271394 0.65714286 0.33113309 0.4719399 0.76742577
|
|
0.61824189 0.48217405 0.58888181 0.6799001 ]
|
|
|
|
mean value: 0.572766293012361
|
|
|
|
key: train_mcc
|
|
value: [0.74743955 0.66948865 0.71799991 0.72780333 0.7100299 0.72876544
|
|
0.70906519 0.74064358 0.74750098 0.71895922]
|
|
|
|
mean value: 0.7217695750772882
|
|
|
|
key: test_accuracy
|
|
value: [0.87931034 0.94827586 0.9137931 0.87931034 0.89473684 0.94736842
|
|
0.9122807 0.89473684 0.9122807 0.92982456]
|
|
|
|
mean value: 0.9111917725347852
|
|
|
|
key: train_accuracy
|
|
value: [0.94379845 0.92829457 0.9379845 0.93992248 0.93617021 0.94003868
|
|
0.93617021 0.94197292 0.94390716 0.93810445]
|
|
|
|
mean value: 0.9386363636363636
|
|
|
|
key: test_fscore
|
|
value: [0.36363636 0.8 0.70588235 0.22222222 0.4 0.76923077
|
|
0.66666667 0.5 0.61538462 0.66666667]
|
|
|
|
mean value: 0.570968965674848
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:115: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:118: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.76422764 0.68907563 0.72881356 0.7394958 0.73170732 0.74796748
|
|
0.72727273 0.765625 0.76422764 0.73770492]
|
|
|
|
mean value: 0.7396117714499789
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.85714286 0.66666667 1. 1. 1.
|
|
0.71428571 0.75 0.8 1. ]
|
|
|
|
mean value: 0.8454761904761905
|
|
|
|
key: train_precision
|
|
value: [0.92156863 0.87234043 0.93478261 0.93617021 0.88235294 0.90196078
|
|
0.89795918 0.875 0.92156863 0.9 ]
|
|
|
|
mean value: 0.9043703411059151
|
|
|
|
key: test_recall
|
|
value: [0.25 0.75 0.75 0.125 0.25 0.625 0.625 0.375 0.5 0.5 ]
|
|
|
|
mean value: 0.475
|
|
|
|
key: train_recall
|
|
value: [0.65277778 0.56944444 0.59722222 0.61111111 0.625 0.63888889
|
|
0.61111111 0.68055556 0.65277778 0.625 ]
|
|
|
|
mean value: 0.6263888888888889
|
|
|
|
key: test_roc_auc
|
|
value: [0.615 0.865 0.845 0.5625 0.625 0.8125
|
|
0.79209184 0.67729592 0.73979592 0.75 ]
|
|
|
|
mean value: 0.7284183673469388
|
|
|
|
key: train_roc_auc
|
|
value: [0.82188438 0.77796547 0.79523273 0.80217718 0.80575843 0.81382647
|
|
0.79993758 0.83241261 0.82189451 0.80688202]
|
|
|
|
mean value: 0.807797137024665
|
|
|
|
key: test_jcc
|
|
value: [0.22222222 0.66666667 0.54545455 0.125 0.25 0.625
|
|
0.5 0.33333333 0.44444444 0.5 ]
|
|
|
|
mean value: 0.4212121212121212
|
|
|
|
key: train_jcc
|
|
value: [0.61842105 0.52564103 0.57333333 0.58666667 0.57692308 0.5974026
|
|
0.57142857 0.62025316 0.61842105 0.58441558]
|
|
|
|
mean value: 0.5872906125630976
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04096818 0.04433274 0.04302597 0.04352307 0.04361558 0.0465672
|
|
0.0442369 0.05012655 0.0439167 0.04356551]
|
|
|
|
mean value: 0.04438784122467041
|
|
|
|
key: score_time
|
|
value: [0.01493621 0.01637697 0.01268125 0.01615691 0.01562548 0.01597953
|
|
0.01594853 0.01640224 0.01662183 0.01266122]
|
|
|
|
mean value: 0.015339016914367676
|
|
|
|
key: test_mcc
|
|
value: [0.88543774 0.85877551 0.76023142 0.87954274 0.7793222 0.80041656
|
|
0.885171 0.84930737 0.85732141 0.79582243]
|
|
|
|
mean value: 0.8351348390829758
|
|
|
|
key: train_mcc
|
|
value: [0.87706105 0.8850239 0.89545662 0.89116747 0.87509498 0.88027159
|
|
0.88195302 0.87023584 0.89390612 0.88893422]
|
|
|
|
mean value: 0.883910479878775
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 0.92929293 0.87878788 0.93939394 0.88888889 0.8989899
|
|
0.93939394 0.91919192 0.92857143 0.8877551 ]
|
|
|
|
mean value: 0.9149659863945578
|
|
|
|
key: train_accuracy
|
|
value: [0.93813273 0.94150731 0.94713161 0.94488189 0.93700787 0.93925759
|
|
0.94038245 0.93475816 0.94606742 0.94382022]
|
|
|
|
mean value: 0.9412947257997245
|
|
|
|
key: test_fscore
|
|
value: [0.94230769 0.92929293 0.88235294 0.94 0.88659794 0.90384615
|
|
0.94339623 0.92592593 0.92929293 0.89908257]
|
|
|
|
mean value: 0.9182095305208865
|
|
|
|
key: train_fscore
|
|
value: [0.93949395 0.94347826 0.94852136 0.94644809 0.93846154 0.94104803
|
|
0.94182217 0.93598234 0.94771242 0.9452954 ]
|
|
|
|
mean value: 0.942826356575844
|
|
|
|
key: test_precision
|
|
value: [0.89090909 0.92 0.8490566 0.92156863 0.91489362 0.87037037
|
|
0.89285714 0.86206897 0.92 0.81666667]
|
|
|
|
mean value: 0.8858391084566354
|
|
|
|
key: train_precision
|
|
value: [0.92025862 0.91368421 0.92521368 0.9212766 0.91630901 0.91313559
|
|
0.91862955 0.91774892 0.91966173 0.92110874]
|
|
|
|
mean value: 0.9187026651959805
|
|
|
|
key: test_recall
|
|
value: [1. 0.93877551 0.91836735 0.95918367 0.86 0.94
|
|
1. 1. 0.93877551 1. ]
|
|
|
|
mean value: 0.9555102040816327
|
|
|
|
key: train_recall
|
|
value: [0.95955056 0.9752809 0.97303371 0.97303371 0.96171171 0.97072072
|
|
0.96621622 0.95495495 0.97752809 0.97078652]
|
|
|
|
mean value: 0.9682817086749671
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.92938776 0.87918367 0.93959184 0.88918367 0.89857143
|
|
0.93877551 0.91836735 0.92857143 0.8877551 ]
|
|
|
|
mean value: 0.9149387755102041
|
|
|
|
key: train_roc_auc
|
|
value: [0.93810861 0.94146928 0.94710244 0.94485019 0.93703563 0.93929294
|
|
0.94041148 0.93478085 0.94606742 0.94382022]
|
|
|
|
mean value: 0.9412939062658163
|
|
|
|
key: test_jcc
|
|
value: [0.89090909 0.86792453 0.78947368 0.88679245 0.7962963 0.8245614
|
|
0.89285714 0.86206897 0.86792453 0.81666667]
|
|
|
|
mean value: 0.8495474759399698
|
|
|
|
key: train_jcc
|
|
value: [0.88589212 0.89300412 0.90208333 0.89834025 0.88405797 0.88865979
|
|
0.89004149 0.87966805 0.90062112 0.89626556]
|
|
|
|
mean value: 0.8918633800280686
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.15772343 1.38369155 1.41796398 1.48340511 1.65042877 1.3559742
|
|
1.41186523 1.45863152 1.25694275 1.337152 ]
|
|
|
|
mean value: 1.391377854347229
|
|
|
|
key: score_time
|
|
value: [0.01506114 0.01268411 0.01565075 0.01573563 0.01261926 0.01576948
|
|
0.01900387 0.01586056 0.01579022 0.01528335]
|
|
|
|
mean value: 0.01534583568572998
|
|
|
|
key: test_mcc
|
|
value: [0.96039208 0.85877551 0.78594951 0.91996741 0.89918367 0.77905308
|
|
0.92213889 0.86710997 0.87755102 0.86164044]
|
|
|
|
mean value: 0.8731761578968935
|
|
|
|
key: train_mcc
|
|
value: [0.94827761 0.9483731 0.94831575 0.95059198 0.95281672 0.93735133
|
|
0.93937508 0.93487516 0.9508019 0.94610564]
|
|
|
|
mean value: 0.9456884256354796
|
|
|
|
key: test_accuracy
|
|
value: [0.97979798 0.92929293 0.88888889 0.95959596 0.94949495 0.88888889
|
|
0.95959596 0.92929293 0.93877551 0.92857143]
|
|
|
|
mean value: 0.9352195423623995
|
|
|
|
key: train_accuracy
|
|
value: [0.97412823 0.97412823 0.97412823 0.97525309 0.97637795 0.96850394
|
|
0.9696288 0.96737908 0.9752809 0.97303371]
|
|
|
|
mean value: 0.9727842165796691
|
|
|
|
key: test_fscore
|
|
value: [0.98 0.92929293 0.8952381 0.96 0.94949495 0.89320388
|
|
0.96153846 0.93457944 0.93877551 0.93203883]
|
|
|
|
mean value: 0.9374162103467455
|
|
|
|
key: train_fscore
|
|
value: [0.97424412 0.97435897 0.97430168 0.97544643 0.97648376 0.96888889
|
|
0.9698324 0.96759777 0.97555556 0.97315436]
|
|
|
|
mean value: 0.9729863936905008
|
|
|
|
key: test_precision
|
|
value: [0.96078431 0.92 0.83928571 0.94117647 0.95918367 0.86792453
|
|
0.92592593 0.87719298 0.93877551 0.88888889]
|
|
|
|
mean value: 0.9119138007845751
|
|
|
|
key: train_precision
|
|
value: [0.97098214 0.96681416 0.96888889 0.96895787 0.97104677 0.95614035
|
|
0.96230599 0.96008869 0.96483516 0.9688196 ]
|
|
|
|
mean value: 0.9658879626350028
|
|
|
|
key: test_recall
|
|
value: [1. 0.93877551 0.95918367 0.97959184 0.94 0.92
|
|
1. 1. 0.93877551 0.97959184]
|
|
|
|
mean value: 0.9655918367346938
|
|
|
|
key: train_recall
|
|
value: [0.97752809 0.98202247 0.97977528 0.98202247 0.98198198 0.98198198
|
|
0.97747748 0.97522523 0.98651685 0.97752809]
|
|
|
|
mean value: 0.9802059925093632
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.92938776 0.88959184 0.95979592 0.94959184 0.88857143
|
|
0.95918367 0.92857143 0.93877551 0.92857143]
|
|
|
|
mean value: 0.9352040816326531
|
|
|
|
key: train_roc_auc
|
|
value: [0.97412441 0.97411934 0.97412187 0.97524547 0.97638425 0.96851908
|
|
0.96963762 0.96738789 0.9752809 0.97303371]
|
|
|
|
mean value: 0.9727854539933192
|
|
|
|
key: test_jcc
|
|
value: [0.96078431 0.86792453 0.81034483 0.92307692 0.90384615 0.80701754
|
|
0.92592593 0.87719298 0.88461538 0.87272727]
|
|
|
|
mean value: 0.8833455856121033
|
|
|
|
key: train_jcc
|
|
value: [0.94978166 0.95 0.94989107 0.95206972 0.95404814 0.93965517
|
|
0.94143167 0.93722944 0.95227766 0.94771242]
|
|
|
|
mean value: 0.9474096939238826
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01670694 0.0122323 0.01200414 0.01167989 0.01203704 0.011657
|
|
0.01165462 0.01166058 0.0116787 0.01182604]
|
|
|
|
mean value: 0.01231372356414795
|
|
|
|
key: score_time
|
|
value: [0.01283312 0.0097065 0.0093565 0.00923562 0.00938773 0.0093236
|
|
0.00929379 0.00930882 0.00924587 0.00933671]
|
|
|
|
mean value: 0.009702825546264648
|
|
|
|
key: test_mcc
|
|
value: [0.61967734 0.69701925 0.63699474 0.61890291 0.61616992 0.74087648
|
|
0.61702314 0.76289389 0.63477162 0.57250257]
|
|
|
|
mean value: 0.6516831859688538
|
|
|
|
key: train_mcc
|
|
value: [0.66988734 0.67717836 0.67285383 0.67606418 0.68389171 0.66353003
|
|
0.67005662 0.66787818 0.67015211 0.68840742]
|
|
|
|
mean value: 0.673989976845721
|
|
|
|
key: test_accuracy
|
|
value: [0.80808081 0.84848485 0.81818182 0.80808081 0.80808081 0.86868687
|
|
0.80808081 0.87878788 0.81632653 0.78571429]
|
|
|
|
mean value: 0.8248505462791177
|
|
|
|
key: train_accuracy
|
|
value: [0.83464567 0.83802025 0.83577053 0.83802025 0.84139483 0.83127109
|
|
0.83464567 0.83352081 0.83483146 0.84382022]
|
|
|
|
mean value: 0.836594077425715
|
|
|
|
key: test_fscore
|
|
value: [0.81553398 0.84536082 0.82 0.79569892 0.81188119 0.87619048
|
|
0.81553398 0.88679245 0.82352941 0.79207921]
|
|
|
|
mean value: 0.8282600447463474
|
|
|
|
key: train_fscore
|
|
value: [0.83828383 0.84279476 0.84095861 0.83892617 0.84556407 0.83552632
|
|
0.83828383 0.83736264 0.83792723 0.84742042]
|
|
|
|
mean value: 0.8403047874143085
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.85416667 0.80392157 0.84090909 0.80392157 0.83636364
|
|
0.79245283 0.83928571 0.79245283 0.76923077]
|
|
|
|
mean value: 0.8110482452865916
|
|
|
|
key: train_precision
|
|
value: [0.82112069 0.81953291 0.81606765 0.83518931 0.82302772 0.81410256
|
|
0.81935484 0.81759657 0.82251082 0.82832618]
|
|
|
|
mean value: 0.8216829251868135
|
|
|
|
key: test_recall
|
|
value: [0.85714286 0.83673469 0.83673469 0.75510204 0.82 0.92
|
|
0.84 0.94 0.85714286 0.81632653]
|
|
|
|
mean value: 0.8479183673469388
|
|
|
|
key: train_recall
|
|
value: [0.85617978 0.86741573 0.86741573 0.84269663 0.86936937 0.85810811
|
|
0.85810811 0.85810811 0.85393258 0.86741573]
|
|
|
|
mean value: 0.8598749873468975
|
|
|
|
key: test_roc_auc
|
|
value: [0.80857143 0.84836735 0.81836735 0.80755102 0.80795918 0.86816327
|
|
0.8077551 0.87816327 0.81632653 0.78571429]
|
|
|
|
mean value: 0.8246938775510204
|
|
|
|
key: train_roc_auc
|
|
value: [0.83462142 0.83798714 0.83573489 0.83801498 0.84142626 0.83130125
|
|
0.83467203 0.83354844 0.83483146 0.84382022]
|
|
|
|
mean value: 0.8365958092924385
|
|
|
|
key: test_jcc
|
|
value: [0.68852459 0.73214286 0.69491525 0.66071429 0.68333333 0.77966102
|
|
0.68852459 0.79661017 0.7 0.6557377 ]
|
|
|
|
mean value: 0.7080163802114344
|
|
|
|
key: train_jcc
|
|
value: [0.72159091 0.72830189 0.72556391 0.72254335 0.73244782 0.71751412
|
|
0.72159091 0.72022684 0.72106262 0.7352381 ]
|
|
|
|
mean value: 0.7246080466414571
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01235771 0.016433 0.01653194 0.0165143 0.01693535 0.01728439
|
|
0.01711226 0.01644254 0.01671052 0.01635647]
|
|
|
|
mean value: 0.016267848014831544
|
|
|
|
key: score_time
|
|
value: [0.01219749 0.01242304 0.01246047 0.01245522 0.01254535 0.01279736
|
|
0.01249051 0.01241422 0.01243734 0.01243448]
|
|
|
|
mean value: 0.012465548515319825
|
|
|
|
key: test_mcc
|
|
value: [0.6862556 0.61967734 0.68214847 0.61616992 0.68137582 0.58471775
|
|
0.74940354 0.72626138 0.73607474 0.6261586 ]
|
|
|
|
mean value: 0.6708243147038132
|
|
|
|
key: train_mcc
|
|
value: [0.68515739 0.70883768 0.68152428 0.69917607 0.6912205 0.69511939
|
|
0.68367001 0.68730629 0.6851191 0.69257007]
|
|
|
|
mean value: 0.6909700772648195
|
|
|
|
key: test_accuracy
|
|
value: [0.83838384 0.80808081 0.83838384 0.80808081 0.83838384 0.78787879
|
|
0.86868687 0.85858586 0.86734694 0.79591837]
|
|
|
|
mean value: 0.8309729952587095
|
|
|
|
key: train_accuracy
|
|
value: [0.83914511 0.8503937 0.83689539 0.84701912 0.84139483 0.8447694
|
|
0.83802025 0.84026997 0.83932584 0.84382022]
|
|
|
|
mean value: 0.8421053828945539
|
|
|
|
key: test_fscore
|
|
value: [0.8490566 0.81553398 0.84615385 0.80412371 0.8490566 0.80733945
|
|
0.88073394 0.87037037 0.87128713 0.8245614 ]
|
|
|
|
mean value: 0.8418217042711172
|
|
|
|
key: train_fscore
|
|
value: [0.84994753 0.86102403 0.84848485 0.8559322 0.85266458 0.85381356
|
|
0.8490566 0.85052632 0.84963197 0.85259809]
|
|
|
|
mean value: 0.8523679732652452
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.77777778 0.8 0.8125 0.80357143 0.74576271
|
|
0.81355932 0.81034483 0.84615385 0.72307692]
|
|
|
|
mean value: 0.7922220521275014
|
|
|
|
key: train_precision
|
|
value: [0.79724409 0.8046875 0.79296875 0.80961924 0.79532164 0.806
|
|
0.79411765 0.79841897 0.79841897 0.80722892]
|
|
|
|
mean value: 0.800402572777755
|
|
|
|
key: test_recall
|
|
value: [0.91836735 0.85714286 0.89795918 0.79591837 0.9 0.88
|
|
0.96 0.94 0.89795918 0.95918367]
|
|
|
|
mean value: 0.9006530612244897
|
|
|
|
key: train_recall
|
|
value: [0.91011236 0.9258427 0.91235955 0.90786517 0.91891892 0.90765766
|
|
0.91216216 0.90990991 0.90786517 0.90337079]
|
|
|
|
mean value: 0.9116064378985728
|
|
|
|
key: test_roc_auc
|
|
value: [0.83918367 0.80857143 0.83897959 0.80795918 0.8377551 0.78693878
|
|
0.8677551 0.8577551 0.86734694 0.79591837]
|
|
|
|
mean value: 0.8308163265306122
|
|
|
|
key: train_roc_auc
|
|
value: [0.83906519 0.85030874 0.83681041 0.8469506 0.84148193 0.84484006
|
|
0.83810355 0.84034821 0.83932584 0.84382022]
|
|
|
|
mean value: 0.8421054762627797
|
|
|
|
key: test_jcc
|
|
value: [0.73770492 0.68852459 0.73333333 0.67241379 0.73770492 0.67692308
|
|
0.78688525 0.7704918 0.77192982 0.70149254]
|
|
|
|
mean value: 0.7277404040644531
|
|
|
|
key: train_jcc
|
|
value: [0.73905109 0.7559633 0.73684211 0.74814815 0.7431694 0.74491682
|
|
0.73770492 0.73992674 0.73857404 0.74306839]
|
|
|
|
mean value: 0.7427364960709436
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01498485 0.012537 0.01221442 0.01222658 0.01210451 0.01211619
|
|
0.01221776 0.01238251 0.01112866 0.01199746]
|
|
|
|
mean value: 0.012390995025634765
|
|
|
|
key: score_time
|
|
value: [0.04104924 0.01468563 0.01498485 0.01763797 0.01504183 0.01545286
|
|
0.01515865 0.01949358 0.01675677 0.01542091]
|
|
|
|
mean value: 0.01856822967529297
|
|
|
|
key: test_mcc
|
|
value: [0.88175388 0.78197378 0.74519047 0.70443143 0.82219219 0.86006806
|
|
0.74087648 0.7035978 0.79591837 0.73854895]
|
|
|
|
mean value: 0.7774551412914182
|
|
|
|
key: train_mcc
|
|
value: [0.83144273 0.84681927 0.85725971 0.8399232 0.8314714 0.85163105
|
|
0.84260802 0.86373865 0.84276996 0.85100607]
|
|
|
|
mean value: 0.8458670062920586
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 0.88888889 0.86868687 0.84848485 0.90909091 0.92929293
|
|
0.86868687 0.84848485 0.89795918 0.86734694]
|
|
|
|
mean value: 0.8866316223459081
|
|
|
|
key: train_accuracy
|
|
value: [0.91451069 0.9223847 0.928009 0.91901012 0.91451069 0.92463442
|
|
0.92013498 0.93138358 0.92022472 0.9247191 ]
|
|
|
|
mean value: 0.9219521997952503
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.89320388 0.87619048 0.85714286 0.91428571 0.93203883
|
|
0.87619048 0.85981308 0.89795918 0.87378641]
|
|
|
|
mean value: 0.892178738839697
|
|
|
|
key: train_fscore
|
|
value: [0.91774892 0.92508143 0.92997812 0.92173913 0.9175705 0.92725299
|
|
0.92290988 0.93289329 0.92307692 0.92693566]
|
|
|
|
mean value: 0.9245186837101238
|
|
|
|
key: test_precision
|
|
value: [0.90566038 0.85185185 0.82142857 0.80357143 0.87272727 0.90566038
|
|
0.83636364 0.80701754 0.89795918 0.83333333]
|
|
|
|
mean value: 0.8535573576526194
|
|
|
|
key: train_precision
|
|
value: [0.88517745 0.89495798 0.90618337 0.89263158 0.88493724 0.8951782
|
|
0.89098532 0.91182796 0.89121339 0.90042373]
|
|
|
|
mean value: 0.895351621946817
|
|
|
|
key: test_recall
|
|
value: [0.97959184 0.93877551 0.93877551 0.91836735 0.96 0.96
|
|
0.92 0.92 0.89795918 0.91836735]
|
|
|
|
mean value: 0.9351836734693878
|
|
|
|
key: train_recall
|
|
value: [0.95280899 0.95730337 0.95505618 0.95280899 0.9527027 0.96171171
|
|
0.95720721 0.95495495 0.95730337 0.95505618]
|
|
|
|
mean value: 0.9556913655228262
|
|
|
|
key: test_roc_auc
|
|
value: [0.93979592 0.88938776 0.86938776 0.84918367 0.90857143 0.92897959
|
|
0.86816327 0.8477551 0.89795918 0.86734694]
|
|
|
|
mean value: 0.8866530612244898
|
|
|
|
key: train_roc_auc
|
|
value: [0.91446756 0.92234538 0.92797854 0.91897206 0.9145536 0.92467608
|
|
0.92017664 0.93141006 0.92022472 0.9247191 ]
|
|
|
|
mean value: 0.9219523737220366
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.80701754 0.77966102 0.75 0.84210526 0.87272727
|
|
0.77966102 0.75409836 0.81481481 0.77586207]
|
|
|
|
mean value: 0.806483624696808
|
|
|
|
key: train_jcc
|
|
value: [0.848 0.86060606 0.86912065 0.85483871 0.84769539 0.86437247
|
|
0.85685484 0.8742268 0.85714286 0.86382114]
|
|
|
|
mean value: 0.8596678923285026
|
|
|
|
MCC on Blind test: 0.36
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05112791 0.05271435 0.04695916 0.04676914 0.04442358 0.05239367
|
|
0.0524261 0.05252886 0.04696584 0.04334927]
|
|
|
|
mean value: 0.04896578788757324
|
|
|
|
key: score_time
|
|
value: [0.01946115 0.01765037 0.01746798 0.017416 0.01768327 0.01963854
|
|
0.01943851 0.0194993 0.01699495 0.01698184]
|
|
|
|
mean value: 0.01822319030761719
|
|
|
|
key: test_mcc
|
|
value: [0.90369611 0.78197378 0.80888973 0.81836735 0.86023767 0.72213485
|
|
0.84441196 0.84930737 0.83673469 0.75393703]
|
|
|
|
mean value: 0.8179690556099021
|
|
|
|
key: train_mcc
|
|
value: [0.86247355 0.87149369 0.89116747 0.86818268 0.8805317 0.87841352
|
|
0.88027159 0.87390345 0.86868505 0.87642627]
|
|
|
|
mean value: 0.8751548976845608
|
|
|
|
key: test_accuracy
|
|
value: [0.94949495 0.88888889 0.8989899 0.90909091 0.92929293 0.85858586
|
|
0.91919192 0.91919192 0.91836735 0.86734694]
|
|
|
|
mean value: 0.9058441558441559
|
|
|
|
key: train_accuracy
|
|
value: [0.93025872 0.93475816 0.94488189 0.93250844 0.93925759 0.93813273
|
|
0.93925759 0.93588301 0.93258427 0.93707865]
|
|
|
|
mean value: 0.9364601054081723
|
|
|
|
key: test_fscore
|
|
value: [0.95145631 0.89320388 0.90566038 0.90909091 0.92783505 0.86792453
|
|
0.9245283 0.92592593 0.91836735 0.88073394]
|
|
|
|
mean value: 0.9104726580178057
|
|
|
|
key: train_fscore
|
|
value: [0.9326087 0.93695652 0.94644809 0.93534483 0.94117647 0.94015234
|
|
0.94104803 0.93797606 0.93548387 0.93926247]
|
|
|
|
mean value: 0.9386457382219968
|
|
|
|
key: test_precision
|
|
value: [0.90740741 0.85185185 0.84210526 0.9 0.95744681 0.82142857
|
|
0.875 0.86206897 0.91836735 0.8 ]
|
|
|
|
mean value: 0.873567621481238
|
|
|
|
key: train_precision
|
|
value: [0.90315789 0.90736842 0.9212766 0.89855072 0.91139241 0.90947368
|
|
0.91313559 0.90736842 0.89690722 0.90775681]
|
|
|
|
mean value: 0.907638776963066
|
|
|
|
key: test_recall
|
|
value: [1. 0.93877551 0.97959184 0.91836735 0.9 0.92
|
|
0.98 1. 0.91836735 0.97959184]
|
|
|
|
mean value: 0.953469387755102
|
|
|
|
key: train_recall
|
|
value: [0.96404494 0.96853933 0.97303371 0.9752809 0.97297297 0.97297297
|
|
0.97072072 0.97072072 0.97752809 0.97303371]
|
|
|
|
mean value: 0.971884806154469
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 0.88938776 0.89979592 0.90918367 0.92959184 0.85795918
|
|
0.91857143 0.91836735 0.91836735 0.86734694]
|
|
|
|
mean value: 0.9058571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.93022067 0.93472011 0.94485019 0.93246027 0.93929548 0.93817188
|
|
0.93929294 0.93592216 0.93258427 0.93707865]
|
|
|
|
mean value: 0.9364596619091001
|
|
|
|
key: test_jcc
|
|
value: [0.90740741 0.80701754 0.82758621 0.83333333 0.86538462 0.76666667
|
|
0.85964912 0.86206897 0.8490566 0.78688525]
|
|
|
|
mean value: 0.8365055711547706
|
|
|
|
key: train_jcc
|
|
value: [0.87372709 0.88139059 0.89834025 0.87854251 0.88888889 0.88706366
|
|
0.88865979 0.88319672 0.87878788 0.88548057]
|
|
|
|
mean value: 0.8844077950138136
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.7973175 1.9862411 3.85263443 4.80663466 4.04329276 3.32388091
|
|
4.14584541 4.38606906 4.34706616 3.77794051]
|
|
|
|
mean value: 3.6466922521591187
|
|
|
|
key: score_time
|
|
value: [0.01326275 0.01297712 0.02374196 0.01897097 0.01499271 0.01290107
|
|
0.01486325 0.01285863 0.02248549 0.0127511 ]
|
|
|
|
mean value: 0.01598050594329834
|
|
|
|
key: test_mcc
|
|
value: [0.93959184 0.87877551 0.86023767 0.90069541 0.96039208 0.83898714
|
|
0.91990261 0.80829204 0.87755102 0.93897107]
|
|
|
|
mean value: 0.892339638572123
|
|
|
|
key: train_mcc
|
|
value: [0.96627644 0.96851354 0.9977528 0.99551061 0.99775281 0.99327351
|
|
1. 0.99775281 0.99551567 0.99326093]
|
|
|
|
mean value: 0.9905609123642101
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.93939394 0.92929293 0.94949495 0.97979798 0.91919192
|
|
0.95959596 0.8989899 0.93877551 0.96938776]
|
|
|
|
mean value: 0.9453617810760668
|
|
|
|
key: train_accuracy
|
|
value: [0.98312711 0.98425197 0.99887514 0.99775028 0.99887514 0.99662542
|
|
1. 0.99887514 0.99775281 0.99662921]
|
|
|
|
mean value: 0.9952762224946601
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.93877551 0.93069307 0.95049505 0.97959184 0.92156863
|
|
0.96078431 0.90740741 0.93877551 0.96907216]
|
|
|
|
mean value: 0.946686045918404
|
|
|
|
key: train_fscore
|
|
value: [0.98308906 0.98430493 0.99887767 0.99775785 0.99887514 0.996633
|
|
1. 0.99887514 0.99775785 0.99662542]
|
|
|
|
mean value: 0.9952796057278696
|
|
|
|
key: test_precision
|
|
value: [0.96 0.93877551 0.90384615 0.92307692 1. 0.90384615
|
|
0.94230769 0.84482759 0.93877551 0.97916667]
|
|
|
|
mean value: 0.933462219635865
|
|
|
|
key: train_precision
|
|
value: [0.98642534 0.98210291 0.99775785 0.99552573 0.99775281 0.99328859
|
|
1. 0.99775281 0.99552573 0.99774775]
|
|
|
|
mean value: 0.9943879505645559
|
|
|
|
key: test_recall
|
|
value: [0.97959184 0.93877551 0.95918367 0.97959184 0.96 0.94
|
|
0.98 0.98 0.93877551 0.95918367]
|
|
|
|
mean value: 0.9615102040816327
|
|
|
|
key: train_recall
|
|
value: [0.97977528 0.98651685 1. 1. 1. 1.
|
|
1. 1. 1. 0.99550562]
|
|
|
|
mean value: 0.9961797752808988
|
|
|
|
key: test_roc_auc
|
|
value: [0.96979592 0.93938776 0.92959184 0.94979592 0.98 0.91897959
|
|
0.95938776 0.89816327 0.93877551 0.96938776]
|
|
|
|
mean value: 0.9453265306122449
|
|
|
|
key: train_roc_auc
|
|
value: [0.98313088 0.98424942 0.99887387 0.99774775 0.9988764 0.99662921
|
|
1. 0.9988764 0.99775281 0.99662921]
|
|
|
|
mean value: 0.9952765968215407
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.88461538 0.87037037 0.90566038 0.96 0.85454545
|
|
0.9245283 0.83050847 0.88461538 0.94 ]
|
|
|
|
mean value: 0.8996020218556383
|
|
|
|
key: train_jcc
|
|
value: [0.96674058 0.96909492 0.99775785 0.99552573 0.99775281 0.99328859
|
|
1. 0.99775281 0.99552573 0.99327354]
|
|
|
|
mean value: 0.9906712552088768
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07483268 0.08088851 0.09625077 0.06065893 0.06522965 0.08098173
|
|
0.05914283 0.07533932 0.07977986 0.08172202]
|
|
|
|
mean value: 0.07548263072967529
|
|
|
|
key: score_time
|
|
value: [0.00974655 0.00964212 0.01238799 0.00933719 0.00959468 0.00909543
|
|
0.00903368 0.0092895 0.00946617 0.00930023]
|
|
|
|
mean value: 0.00968935489654541
|
|
|
|
key: test_mcc
|
|
value: [0.86023767 0.80041656 0.87954274 0.91996741 0.89918367 0.84441196
|
|
0.90057555 0.80365293 0.81649658 0.8996469 ]
|
|
|
|
mean value: 0.8624131972755258
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92929293 0.8989899 0.93939394 0.95959596 0.94949495 0.91919192
|
|
0.94949495 0.8989899 0.90816327 0.94897959]
|
|
|
|
mean value: 0.9301587301587302
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93069307 0.89361702 0.94 0.96 0.94949495 0.9245283
|
|
0.95145631 0.90566038 0.90721649 0.95049505]
|
|
|
|
mean value: 0.9313161574353681
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.90384615 0.93333333 0.92156863 0.94117647 0.95918367 0.875
|
|
0.9245283 0.85714286 0.91666667 0.92307692]
|
|
|
|
mean value: 0.915552300746133
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95918367 0.85714286 0.95918367 0.97959184 0.94 0.98
|
|
0.98 0.96 0.89795918 0.97959184]
|
|
|
|
mean value: 0.949265306122449
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.92959184 0.89857143 0.93959184 0.95979592 0.94959184 0.91857143
|
|
0.94918367 0.89836735 0.90816327 0.94897959]
|
|
|
|
mean value: 0.9300408163265307
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.87037037 0.80769231 0.88679245 0.92307692 0.90384615 0.85964912
|
|
0.90740741 0.82758621 0.83018868 0.90566038]
|
|
|
|
mean value: 0.8722270001530694
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16283011 0.16077089 0.15987325 0.16237903 0.15924025 0.16111422
|
|
0.16178083 0.16142702 0.17704821 0.15939021]
|
|
|
|
mean value: 0.16258540153503417
|
|
|
|
key: score_time
|
|
value: [0.01860547 0.01890182 0.0190649 0.01875401 0.01879978 0.01877117
|
|
0.0189867 0.01962185 0.01872444 0.02043891]
|
|
|
|
mean value: 0.019066905975341795
|
|
|
|
key: test_mcc
|
|
value: [0.94115314 0.91990261 0.87954274 0.88156478 0.93959184 0.93959184
|
|
0.94108303 0.92213889 0.88420483 0.95918367]
|
|
|
|
mean value: 0.9207957376894903
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.95959596 0.93939394 0.93939394 0.96969697 0.96969697
|
|
0.96969697 0.95959596 0.93877551 0.97959184]
|
|
|
|
mean value: 0.9595135023706453
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97029703 0.95833333 0.94 0.93617021 0.96969697 0.96969697
|
|
0.97087379 0.96153846 0.93478261 0.97959184]
|
|
|
|
mean value: 0.9590981208572775
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94230769 0.9787234 0.92156863 0.97777778 0.97959184 0.97959184
|
|
0.94339623 0.92592593 1. 0.97959184]
|
|
|
|
mean value: 0.9628475164336872
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.93877551 0.95918367 0.89795918 0.96 0.96
|
|
1. 1. 0.87755102 0.97959184]
|
|
|
|
mean value: 0.9573061224489796
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97 0.95938776 0.93959184 0.93897959 0.96979592 0.96979592
|
|
0.96938776 0.95918367 0.93877551 0.97959184]
|
|
|
|
mean value: 0.9594489795918367
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94230769 0.92 0.88679245 0.88 0.94117647 0.94117647
|
|
0.94339623 0.92592593 0.87755102 0.96 ]
|
|
|
|
mean value: 0.9218326259063535
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01317215 0.01331186 0.01284027 0.01374102 0.01220703 0.01224375
|
|
0.01214552 0.01211143 0.01219511 0.01213479]
|
|
|
|
mean value: 0.012610292434692383
|
|
|
|
key: score_time
|
|
value: [0.00985193 0.00912452 0.01000929 0.00967026 0.00914598 0.00912023
|
|
0.00914288 0.00927973 0.00913262 0.00920796]
|
|
|
|
mean value: 0.009368538856506348
|
|
|
|
key: test_mcc
|
|
value: [0.73276862 0.51532527 0.72280632 0.62184172 0.67673469 0.64149061
|
|
0.83174644 0.73755102 0.75763064 0.65428866]
|
|
|
|
mean value: 0.6892184002070045
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85858586 0.75757576 0.85858586 0.80808081 0.83838384 0.81818182
|
|
0.90909091 0.86868687 0.87755102 0.82653061]
|
|
|
|
mean value: 0.8421253349824779
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.87037037 0.75 0.86538462 0.79120879 0.84 0.80851064
|
|
0.91743119 0.86868687 0.87234043 0.83168317]
|
|
|
|
mean value: 0.8415616070457814
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.79661017 0.76595745 0.81818182 0.85714286 0.84 0.86363636
|
|
0.84745763 0.87755102 0.91111111 0.80769231]
|
|
|
|
mean value: 0.8385340721591301
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.95918367 0.73469388 0.91836735 0.73469388 0.84 0.76
|
|
1. 0.86 0.83673469 0.85714286]
|
|
|
|
mean value: 0.8500816326530612
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.85959184 0.75734694 0.85918367 0.80734694 0.83836735 0.81877551
|
|
0.90816327 0.86877551 0.87755102 0.82653061]
|
|
|
|
mean value: 0.8421632653061224
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7704918 0.6 0.76271186 0.65454545 0.72413793 0.67857143
|
|
0.84745763 0.76785714 0.77358491 0.71186441]
|
|
|
|
mean value: 0.729122256425266
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.29178929 3.24830437 3.17507315 3.24564123 3.29587531 3.29325771
|
|
3.2964592 3.179919 3.31195903 3.16840601]
|
|
|
|
mean value: 3.250668430328369
|
|
|
|
key: score_time
|
|
value: [0.1065352 0.10481977 0.10572243 0.09955859 0.10644341 0.10679841
|
|
0.10662079 0.09801412 0.10459375 0.10144281]
|
|
|
|
mean value: 0.10405492782592773
|
|
|
|
key: test_mcc
|
|
value: [0.93959184 0.90057555 0.93959184 0.9035079 0.96039208 0.91990261
|
|
0.96036035 0.96036035 0.89814624 0.9797959 ]
|
|
|
|
mean value: 0.9362224638195957
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.94949495 0.96969697 0.94949495 0.97979798 0.95959596
|
|
0.97979798 0.97979798 0.94897959 0.98979592]
|
|
|
|
mean value: 0.9676149247577819
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.94736842 0.96969697 0.94623656 0.97959184 0.96078431
|
|
0.98039216 0.98039216 0.94845361 0.98969072]
|
|
|
|
mean value: 0.9672303713668937
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96 0.97826087 0.96 1. 1. 0.94230769
|
|
0.96153846 0.96153846 0.95833333 1. ]
|
|
|
|
mean value: 0.9721978818283166
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97959184 0.91836735 0.97959184 0.89795918 0.96 0.98
|
|
1. 1. 0.93877551 0.97959184]
|
|
|
|
mean value: 0.9633877551020408
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96979592 0.94918367 0.96979592 0.94897959 0.98 0.95938776
|
|
0.97959184 0.97959184 0.94897959 0.98979592]
|
|
|
|
mean value: 0.9675102040816327
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.9 0.94117647 0.89795918 0.96 0.9245283
|
|
0.96153846 0.96153846 0.90196078 0.97959184]
|
|
|
|
mean value: 0.9369469970862074
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.12859726 1.17600107 1.12439513 1.14297009 1.17038274 1.15913725
|
|
1.12060213 1.15354204 1.12537003 1.16443944]
|
|
|
|
mean value: 1.1465437173843385
|
|
|
|
key: score_time
|
|
value: [0.26860595 0.22630024 0.2106359 0.28379321 0.27383327 0.23719215
|
|
0.26290917 0.2687254 0.28648877 0.27320886]
|
|
|
|
mean value: 0.2591692924499512
|
|
|
|
key: test_mcc
|
|
value: [0.94115314 0.88156478 0.87954274 0.88156478 0.96039208 0.91990261
|
|
0.94108303 0.94108303 0.89814624 0.95918367]
|
|
|
|
mean value: 0.920361611010916
|
|
|
|
key: train_mcc
|
|
value: [0.97750278 0.98200223 0.98201226 0.97525559 0.97975504 0.97975504
|
|
0.97975493 0.97750278 0.98427215 0.97080858]
|
|
|
|
mean value: 0.978862137703153
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.93939394 0.93939394 0.93939394 0.97979798 0.95959596
|
|
0.96969697 0.96969697 0.94897959 0.97959184]
|
|
|
|
mean value: 0.9595238095238096
|
|
|
|
key: train_accuracy
|
|
value: [0.98875141 0.99100112 0.99100112 0.98762655 0.98987627 0.98987627
|
|
0.98987627 0.98875141 0.99213483 0.98539326]
|
|
|
|
mean value: 0.9894288494837022
|
|
|
|
key: test_fscore
|
|
value: [0.97029703 0.93617021 0.94 0.93617021 0.97959184 0.96078431
|
|
0.97087379 0.97087379 0.94845361 0.97959184]
|
|
|
|
mean value: 0.959280662349272
|
|
|
|
key: train_fscore
|
|
value: [0.98876404 0.99101124 0.99099099 0.98762655 0.98987627 0.98987627
|
|
0.98985344 0.98873874 0.99214366 0.98534386]
|
|
|
|
mean value: 0.9894225041304512
|
|
|
|
key: test_precision
|
|
value: [0.94230769 0.97777778 0.92156863 0.97777778 1. 0.94230769
|
|
0.94339623 0.94339623 0.95833333 0.97959184]
|
|
|
|
mean value: 0.9586457190520137
|
|
|
|
key: train_precision
|
|
value: [0.98876404 0.99101124 0.99322799 0.98873874 0.98876404 0.98876404
|
|
0.99097065 0.98873874 0.99103139 0.98868778]
|
|
|
|
mean value: 0.9898698666802147
|
|
|
|
key: test_recall
|
|
value: [1. 0.89795918 0.95918367 0.89795918 0.96 0.98
|
|
1. 1. 0.93877551 0.97959184]
|
|
|
|
mean value: 0.9613469387755103
|
|
|
|
key: train_recall
|
|
value: [0.98876404 0.99101124 0.98876404 0.98651685 0.99099099 0.99099099
|
|
0.98873874 0.98873874 0.99325843 0.98202247]
|
|
|
|
mean value: 0.9889796538111145
|
|
|
|
key: test_roc_auc
|
|
value: [0.97 0.93897959 0.93959184 0.93897959 0.98 0.95938776
|
|
0.96938776 0.96938776 0.94897959 0.97959184]
|
|
|
|
mean value: 0.9594285714285715
|
|
|
|
key: train_roc_auc
|
|
value: [0.98875139 0.99100111 0.99100364 0.9876278 0.98987752 0.98987752
|
|
0.98987499 0.98875139 0.99213483 0.98539326]
|
|
|
|
mean value: 0.9894293450754125
|
|
|
|
key: test_jcc
|
|
value: [0.94230769 0.88 0.88679245 0.88 0.96 0.9245283
|
|
0.94339623 0.94339623 0.90196078 0.96 ]
|
|
|
|
mean value: 0.9222381684168588
|
|
|
|
key: train_jcc
|
|
value: [0.97777778 0.98218263 0.98214286 0.97555556 0.97995546 0.97995546
|
|
0.97991071 0.97772829 0.9844098 0.97111111]
|
|
|
|
mean value: 0.9790729641708205
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01362324 0.0164392 0.01667666 0.01691866 0.01657128 0.01652837
|
|
0.01677561 0.01668191 0.01984668 0.01651978]
|
|
|
|
mean value: 0.0166581392288208
|
|
|
|
key: score_time
|
|
value: [0.01225281 0.0123353 0.01265836 0.01251817 0.01250219 0.01250148
|
|
0.01241779 0.01244569 0.01448178 0.0124495 ]
|
|
|
|
mean value: 0.012656307220458985
|
|
|
|
key: test_mcc
|
|
value: [0.6862556 0.61967734 0.68214847 0.61616992 0.68137582 0.58471775
|
|
0.74940354 0.72626138 0.73607474 0.6261586 ]
|
|
|
|
mean value: 0.6708243147038132
|
|
|
|
key: train_mcc
|
|
value: [0.68515739 0.70883768 0.68152428 0.69917607 0.6912205 0.69511939
|
|
0.68367001 0.68730629 0.6851191 0.69257007]
|
|
|
|
mean value: 0.6909700772648195
|
|
|
|
key: test_accuracy
|
|
value: [0.83838384 0.80808081 0.83838384 0.80808081 0.83838384 0.78787879
|
|
0.86868687 0.85858586 0.86734694 0.79591837]
|
|
|
|
mean value: 0.8309729952587095
|
|
|
|
key: train_accuracy
|
|
value: [0.83914511 0.8503937 0.83689539 0.84701912 0.84139483 0.8447694
|
|
0.83802025 0.84026997 0.83932584 0.84382022]
|
|
|
|
mean value: 0.8421053828945539
|
|
|
|
key: test_fscore
|
|
value: [0.8490566 0.81553398 0.84615385 0.80412371 0.8490566 0.80733945
|
|
0.88073394 0.87037037 0.87128713 0.8245614 ]
|
|
|
|
mean value: 0.8418217042711172
|
|
|
|
key: train_fscore
|
|
value: [0.84994753 0.86102403 0.84848485 0.8559322 0.85266458 0.85381356
|
|
0.8490566 0.85052632 0.84963197 0.85259809]
|
|
|
|
mean value: 0.8523679732652452
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.77777778 0.8 0.8125 0.80357143 0.74576271
|
|
0.81355932 0.81034483 0.84615385 0.72307692]
|
|
|
|
mean value: 0.7922220521275014
|
|
|
|
key: train_precision
|
|
value: [0.79724409 0.8046875 0.79296875 0.80961924 0.79532164 0.806
|
|
0.79411765 0.79841897 0.79841897 0.80722892]
|
|
|
|
mean value: 0.800402572777755
|
|
|
|
key: test_recall
|
|
value: [0.91836735 0.85714286 0.89795918 0.79591837 0.9 0.88
|
|
0.96 0.94 0.89795918 0.95918367]
|
|
|
|
mean value: 0.9006530612244897
|
|
|
|
key: train_recall
|
|
value: [0.91011236 0.9258427 0.91235955 0.90786517 0.91891892 0.90765766
|
|
0.91216216 0.90990991 0.90786517 0.90337079]
|
|
|
|
mean value: 0.9116064378985728
|
|
|
|
key: test_roc_auc
|
|
value: [0.83918367 0.80857143 0.83897959 0.80795918 0.8377551 0.78693878
|
|
0.8677551 0.8577551 0.86734694 0.79591837]
|
|
|
|
mean value: 0.8308163265306122
|
|
|
|
key: train_roc_auc
|
|
value: [0.83906519 0.85030874 0.83681041 0.8469506 0.84148193 0.84484006
|
|
0.83810355 0.84034821 0.83932584 0.84382022]
|
|
|
|
mean value: 0.8421054762627797
|
|
|
|
key: test_jcc
|
|
value: [0.73770492 0.68852459 0.73333333 0.67241379 0.73770492 0.67692308
|
|
0.78688525 0.7704918 0.77192982 0.70149254]
|
|
|
|
mean value: 0.7277404040644531
|
|
|
|
key: train_jcc
|
|
value: [0.73905109 0.7559633 0.73684211 0.74814815 0.7431694 0.74491682
|
|
0.73770492 0.73992674 0.73857404 0.74306839]
|
|
|
|
mean value: 0.7427364960709436
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.17445803 0.1532166 0.15878916 0.18085885 0.14759994 0.14799905
|
|
0.14707088 0.87270045 0.16591811 0.1497848 ]
|
|
|
|
mean value: 0.22983958721160888
|
|
|
|
key: score_time
|
|
value: [0.01138997 0.01183224 0.01144814 0.01292729 0.01132655 0.01141787
|
|
0.06421971 0.01267552 0.01211953 0.01139641]
|
|
|
|
mean value: 0.017075324058532716
|
|
|
|
key: test_mcc
|
|
value: [0.93959184 0.89914258 0.91996741 0.97999192 0.93959184 0.86710997
|
|
0.94108303 0.98 0.91836735 0.9797959 ]
|
|
|
|
mean value: 0.9364641826900267
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.94949495 0.95959596 0.98989899 0.96969697 0.92929293
|
|
0.96969697 0.98989899 0.95918367 0.98979592]
|
|
|
|
mean value: 0.9676252319109462
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.94845361 0.96 0.98969072 0.96969697 0.93457944
|
|
0.97087379 0.98989899 0.95918367 0.98989899]
|
|
|
|
mean value: 0.9681973148218318
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96 0.95833333 0.94117647 1. 0.97959184 0.87719298
|
|
0.94339623 1. 0.95918367 0.98 ]
|
|
|
|
mean value: 0.9598874522996885
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.97959184 0.93877551 0.97959184 0.97959184 0.96 1.
|
|
1. 0.98 0.95918367 1. ]
|
|
|
|
mean value: 0.9776734693877551
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96979592 0.94938776 0.95979592 0.98979592 0.96979592 0.92857143
|
|
0.96938776 0.99 0.95918367 0.98979592]
|
|
|
|
mean value: 0.9675510204081632
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.90196078 0.92307692 0.97959184 0.94117647 0.87719298
|
|
0.94339623 0.98 0.92156863 0.98 ]
|
|
|
|
mean value: 0.9389140321624028
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.068645 0.07589388 0.07109332 0.080374 0.06090236 0.09334278
|
|
0.07661915 0.09116554 0.1012013 0.06702113]
|
|
|
|
mean value: 0.07862584590911866
|
|
|
|
key: score_time
|
|
value: [0.0205996 0.0126493 0.01983166 0.01257801 0.01251626 0.02523398
|
|
0.01243401 0.02088189 0.01987362 0.01254177]
|
|
|
|
mean value: 0.016914010047912598
|
|
|
|
key: test_mcc
|
|
value: [0.90369611 0.83836735 0.82670854 0.88175388 0.87944488 0.76289389
|
|
0.86285005 0.81441102 0.89814624 0.84307902]
|
|
|
|
mean value: 0.8511350982992749
|
|
|
|
key: train_mcc
|
|
value: [0.92384388 0.94844967 0.94174412 0.92615295 0.93516022 0.93966144
|
|
0.930659 0.93054554 0.93086606 0.93318766]
|
|
|
|
mean value: 0.9340270545570946
|
|
|
|
key: test_accuracy
|
|
value: [0.94949495 0.91919192 0.90909091 0.93939394 0.93939394 0.87878788
|
|
0.92929293 0.8989899 0.94897959 0.91836735]
|
|
|
|
mean value: 0.9230983302411874
|
|
|
|
key: train_accuracy
|
|
value: [0.96175478 0.97412823 0.97075366 0.96287964 0.96737908 0.9696288
|
|
0.96512936 0.96512936 0.96516854 0.96629213]
|
|
|
|
mean value: 0.9668243576294536
|
|
|
|
key: test_fscore
|
|
value: [0.95145631 0.91836735 0.91428571 0.94117647 0.94117647 0.88679245
|
|
0.93333333 0.90909091 0.94949495 0.92307692]
|
|
|
|
mean value: 0.9268250880906875
|
|
|
|
key: train_fscore
|
|
value: [0.96230599 0.97441602 0.97111111 0.96345515 0.96781354 0.9700333
|
|
0.96559378 0.96551724 0.96574586 0.96688742]
|
|
|
|
mean value: 0.9672879401589631
|
|
|
|
key: test_precision
|
|
value: [0.90740741 0.91836735 0.85714286 0.90566038 0.92307692 0.83928571
|
|
0.89090909 0.83333333 0.94 0.87272727]
|
|
|
|
mean value: 0.8887910323179865
|
|
|
|
key: train_precision
|
|
value: [0.94967177 0.96475771 0.96043956 0.94978166 0.95404814 0.95623632
|
|
0.95185996 0.95384615 0.95 0.95010846]
|
|
|
|
mean value: 0.9540749735355485
|
|
|
|
key: test_recall
|
|
value: [1. 0.91836735 0.97959184 0.97959184 0.96 0.94
|
|
0.98 1. 0.95918367 0.97959184]
|
|
|
|
mean value: 0.9696326530612245
|
|
|
|
key: train_recall
|
|
value: [0.9752809 0.98426966 0.98202247 0.97752809 0.98198198 0.98423423
|
|
0.97972973 0.97747748 0.98202247 0.98426966]
|
|
|
|
mean value: 0.980881668185039
|
|
|
|
key: test_roc_auc
|
|
value: [0.95 0.91918367 0.90979592 0.93979592 0.93918367 0.87816327
|
|
0.92877551 0.89795918 0.94897959 0.91836735]
|
|
|
|
mean value: 0.9230204081632654
|
|
|
|
key: train_roc_auc
|
|
value: [0.96173955 0.97411681 0.97074097 0.96286314 0.96739549 0.96964521
|
|
0.96514576 0.96514323 0.96516854 0.96629213]
|
|
|
|
mean value: 0.9668250835104768
|
|
|
|
key: test_jcc
|
|
value: [0.90740741 0.8490566 0.84210526 0.88888889 0.88888889 0.79661017
|
|
0.875 0.83333333 0.90384615 0.85714286]
|
|
|
|
mean value: 0.8642279565930534
|
|
|
|
key: train_jcc
|
|
value: [0.92735043 0.95010846 0.94384449 0.92948718 0.93763441 0.94181034
|
|
0.93347639 0.93333333 0.93376068 0.93589744]
|
|
|
|
mean value: 0.9366703160419035
|
|
|
|
MCC on Blind test: 0.57
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02109385 0.01585078 0.01574206 0.0259254 0.02016664 0.01577473
|
|
0.01604772 0.01927948 0.01565814 0.01562071]
|
|
|
|
mean value: 0.018115949630737305
|
|
|
|
key: score_time
|
|
value: [0.01251531 0.01226664 0.01221132 0.01238561 0.01231337 0.01591039
|
|
0.0158217 0.01335621 0.01224566 0.01215696]
|
|
|
|
mean value: 0.013118314743041991
|
|
|
|
key: test_mcc
|
|
value: [0.55769399 0.66025145 0.57910322 0.49692935 0.55614541 0.64061678
|
|
0.59624074 0.55578301 0.71667764 0.61545745]
|
|
|
|
mean value: 0.5974899038184818
|
|
|
|
key: train_mcc
|
|
value: [0.59771984 0.61815407 0.62676332 0.59589485 0.62748409 0.63683548
|
|
0.62313841 0.60445671 0.63781946 0.61860262]
|
|
|
|
mean value: 0.6186868858081555
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.82828283 0.78787879 0.74747475 0.77777778 0.81818182
|
|
0.7979798 0.77777778 0.85714286 0.80612245]
|
|
|
|
mean value: 0.7976396619253763
|
|
|
|
key: train_accuracy
|
|
value: [0.79865017 0.8087739 0.81327334 0.79752531 0.81327334 0.81777278
|
|
0.81102362 0.80202475 0.81797753 0.80898876]
|
|
|
|
mean value: 0.8089283502483537
|
|
|
|
key: test_fscore
|
|
value: [0.78431373 0.83495146 0.7961165 0.7311828 0.7755102 0.83018868
|
|
0.80392157 0.78431373 0.8627451 0.81553398]
|
|
|
|
mean value: 0.8018777738420472
|
|
|
|
key: train_fscore
|
|
value: [0.80264609 0.81318681 0.81596452 0.80306346 0.81798246 0.8231441
|
|
0.81619256 0.80530973 0.82467532 0.81318681]
|
|
|
|
mean value: 0.8135351873290931
|
|
|
|
key: test_precision
|
|
value: [0.75471698 0.7962963 0.75925926 0.77272727 0.79166667 0.78571429
|
|
0.78846154 0.76923077 0.83018868 0.77777778]
|
|
|
|
mean value: 0.7826039526511225
|
|
|
|
key: train_precision
|
|
value: [0.78787879 0.79569892 0.80525164 0.78251599 0.79700855 0.79872881
|
|
0.79361702 0.79130435 0.7954071 0.79569892]
|
|
|
|
mean value: 0.7943110097741861
|
|
|
|
key: test_recall
|
|
value: [0.81632653 0.87755102 0.83673469 0.69387755 0.76 0.88
|
|
0.82 0.8 0.89795918 0.85714286]
|
|
|
|
mean value: 0.8239591836734694
|
|
|
|
key: train_recall
|
|
value: [0.81797753 0.83146067 0.82696629 0.8247191 0.84009009 0.8490991
|
|
0.84009009 0.81981982 0.85617978 0.83146067]
|
|
|
|
mean value: 0.833786314404292
|
|
|
|
key: test_roc_auc
|
|
value: [0.77816327 0.82877551 0.78836735 0.74693878 0.77795918 0.81755102
|
|
0.7977551 0.77755102 0.85714286 0.80612245]
|
|
|
|
mean value: 0.7976326530612246
|
|
|
|
key: train_roc_auc
|
|
value: [0.7986284 0.80874836 0.81325792 0.79749469 0.81330347 0.81780798
|
|
0.81105628 0.80204474 0.81797753 0.80898876]
|
|
|
|
mean value: 0.8089308128353072
|
|
|
|
key: test_jcc
|
|
value: [0.64516129 0.71666667 0.66129032 0.57627119 0.63333333 0.70967742
|
|
0.67213115 0.64516129 0.75862069 0.68852459]
|
|
|
|
mean value: 0.6706837936381413
|
|
|
|
key: train_jcc
|
|
value: [0.67034991 0.68518519 0.68913858 0.67093236 0.69202226 0.69944341
|
|
0.68946396 0.67407407 0.70165746 0.68518519]
|
|
|
|
mean value: 0.6857452378841745
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02865791 0.02978539 0.0274086 0.0332365 0.03130937 0.0340395
|
|
0.03520131 0.02950144 0.03688049 0.03446531]
|
|
|
|
mean value: 0.032048583030700684
|
|
|
|
key: score_time
|
|
value: [0.01240158 0.01226234 0.01264381 0.0122211 0.01231337 0.01256156
|
|
0.01229358 0.01227355 0.0123105 0.01235223]
|
|
|
|
mean value: 0.012363362312316894
|
|
|
|
key: test_mcc
|
|
value: [0.78535389 0.76801169 0.67265715 0.81511151 0.84099067 0.74940354
|
|
0.84930737 0.83836735 0.87828292 0.79582243]
|
|
|
|
mean value: 0.7993308521602401
|
|
|
|
key: train_mcc
|
|
value: [0.83186151 0.82507168 0.76670468 0.74692302 0.86851887 0.86559467
|
|
0.86764604 0.86334122 0.92405292 0.91319231]
|
|
|
|
mean value: 0.8472906918739715
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.87878788 0.82828283 0.8989899 0.91919192 0.86868687
|
|
0.91919192 0.91919192 0.93877551 0.8877551 ]
|
|
|
|
mean value: 0.8947742733457019
|
|
|
|
key: train_accuracy
|
|
value: [0.91338583 0.90663667 0.87289089 0.85939258 0.93138358 0.93025872
|
|
0.93138358 0.93025872 0.96179775 0.95617978]
|
|
|
|
mean value: 0.919356807927099
|
|
|
|
key: test_fscore
|
|
value: [0.87912088 0.88679245 0.8045977 0.90740741 0.92307692 0.88073394
|
|
0.92592593 0.92 0.9375 0.89908257]
|
|
|
|
mean value: 0.8964237803272217
|
|
|
|
key: train_fscore
|
|
value: [0.90844233 0.91398964 0.85641677 0.87636004 0.93503727 0.93376068
|
|
0.93475936 0.92723005 0.96238938 0.95709571]
|
|
|
|
mean value: 0.920548123277851
|
|
|
|
key: test_precision
|
|
value: [0.95238095 0.8245614 0.92105263 0.83050847 0.88888889 0.81355932
|
|
0.86206897 0.92 0.95744681 0.81666667]
|
|
|
|
mean value: 0.8787134113662276
|
|
|
|
key: train_precision
|
|
value: [0.96464646 0.84807692 0.98538012 0.78268551 0.88686869 0.88821138
|
|
0.89002037 0.96813725 0.94771242 0.9375 ]
|
|
|
|
mean value: 0.9099239125833843
|
|
|
|
key: test_recall
|
|
value: [0.81632653 0.95918367 0.71428571 1. 0.96 0.96
|
|
1. 0.92 0.91836735 1. ]
|
|
|
|
mean value: 0.9248163265306122
|
|
|
|
key: train_recall
|
|
value: [0.85842697 0.99101124 0.75730337 0.99550562 0.98873874 0.98423423
|
|
0.98423423 0.88963964 0.97752809 0.97752809]
|
|
|
|
mean value: 0.9404150217633364
|
|
|
|
key: test_roc_auc
|
|
value: [0.88816327 0.87959184 0.82714286 0.9 0.91877551 0.8677551
|
|
0.91836735 0.91918367 0.93877551 0.8877551 ]
|
|
|
|
mean value: 0.8945510204081633
|
|
|
|
key: train_roc_auc
|
|
value: [0.91344772 0.90654165 0.87302105 0.8592393 0.93144802 0.93031936
|
|
0.93144296 0.93021308 0.96179775 0.95617978]
|
|
|
|
mean value: 0.9193650673145055
|
|
|
|
key: test_jcc
|
|
value: [0.78431373 0.79661017 0.67307692 0.83050847 0.85714286 0.78688525
|
|
0.86206897 0.85185185 0.88235294 0.81666667]
|
|
|
|
mean value: 0.8141477820891643
|
|
|
|
key: train_jcc
|
|
value: [0.83224401 0.84160305 0.74888889 0.77992958 0.878 0.8757515
|
|
0.87751004 0.8643326 0.92750533 0.91772152]
|
|
|
|
mean value: 0.8543486525086521
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03747582 0.0418005 0.02520752 0.02890873 0.03687644 0.03223491
|
|
0.03308201 0.03092504 0.03353763 0.03397942]
|
|
|
|
mean value: 0.03340280055999756
|
|
|
|
key: score_time
|
|
value: [0.01226282 0.01240396 0.01225805 0.01241755 0.01229286 0.01230359
|
|
0.01225424 0.01249099 0.02049494 0.01257706]
|
|
|
|
mean value: 0.013175606727600098
|
|
|
|
key: test_mcc
|
|
value: [0.87954274 0.81977994 0.7793222 0.84976777 0.91990261 0.81829325
|
|
0.82254789 0.87877551 0.85875386 0.84811452]
|
|
|
|
mean value: 0.8474800303611816
|
|
|
|
key: train_mcc
|
|
value: [0.91254275 0.92410528 0.90566029 0.81463082 0.89674621 0.93250835
|
|
0.79242772 0.85771125 0.83348555 0.92612569]
|
|
|
|
mean value: 0.8795943925423657
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 0.90909091 0.88888889 0.91919192 0.95959596 0.90909091
|
|
0.90909091 0.93939394 0.92857143 0.91836735]
|
|
|
|
mean value: 0.9220676149247578
|
|
|
|
key: train_accuracy
|
|
value: [0.95613048 0.96175478 0.95275591 0.90101237 0.94713161 0.96625422
|
|
0.88976378 0.928009 0.91573034 0.96292135]
|
|
|
|
mean value: 0.9381463833874698
|
|
|
|
key: test_fscore
|
|
value: [0.94 0.91089109 0.89108911 0.9245283 0.96078431 0.91089109
|
|
0.90526316 0.94 0.92631579 0.9245283 ]
|
|
|
|
mean value: 0.9234291151996209
|
|
|
|
key: train_fscore
|
|
value: [0.9556314 0.96247241 0.95238095 0.90909091 0.94896851 0.96621622
|
|
0.87871287 0.9255814 0.91268917 0.96337403]
|
|
|
|
mean value: 0.9375117864623228
|
|
|
|
key: test_precision
|
|
value: [0.92156863 0.88461538 0.86538462 0.85964912 0.94230769 0.90196078
|
|
0.95555556 0.94 0.95652174 0.85964912]
|
|
|
|
mean value: 0.9087212644372423
|
|
|
|
key: train_precision
|
|
value: [0.96774194 0.94577007 0.9610984 0.84130019 0.91614256 0.96621622
|
|
0.97527473 0.95673077 0.9468599 0.95175439]
|
|
|
|
mean value: 0.9428889147653975
|
|
|
|
key: test_recall
|
|
value: [0.95918367 0.93877551 0.91836735 1. 0.98 0.92
|
|
0.86 0.94 0.89795918 1. ]
|
|
|
|
mean value: 0.9414285714285714
|
|
|
|
key: train_recall
|
|
value: [0.94382022 0.97977528 0.94382022 0.98876404 0.98423423 0.96621622
|
|
0.79954955 0.8963964 0.88089888 0.9752809 ]
|
|
|
|
mean value: 0.9358755946958194
|
|
|
|
key: test_roc_auc
|
|
value: [0.93959184 0.90938776 0.88918367 0.92 0.95938776 0.90897959
|
|
0.90959184 0.93938776 0.92857143 0.91836735]
|
|
|
|
mean value: 0.9222448979591836
|
|
|
|
key: train_roc_auc
|
|
value: [0.95614435 0.96173449 0.95276597 0.90091355 0.9471733 0.96625418
|
|
0.88966242 0.92797348 0.91573034 0.96292135]
|
|
|
|
mean value: 0.9381273408239701
|
|
|
|
key: test_jcc
|
|
value: [0.88679245 0.83636364 0.80357143 0.85964912 0.9245283 0.83636364
|
|
0.82692308 0.88679245 0.8627451 0.85964912]
|
|
|
|
mean value: 0.8583378329422199
|
|
|
|
key: train_jcc
|
|
value: [0.91503268 0.92765957 0.90909091 0.83333333 0.90289256 0.93464052
|
|
0.78366446 0.86147186 0.83940043 0.92933619]
|
|
|
|
mean value: 0.8836522518825543
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.40846825 0.39222646 0.3957274 0.39534307 0.39278769 0.39440775
|
|
0.38945794 0.38327384 0.38870454 0.39040422]
|
|
|
|
mean value: 0.39308011531829834
|
|
|
|
key: score_time
|
|
value: [0.0171597 0.01704168 0.01784682 0.01693153 0.01880622 0.01620102
|
|
0.01611233 0.01651716 0.01725984 0.01601386]
|
|
|
|
mean value: 0.01698901653289795
|
|
|
|
key: test_mcc
|
|
value: [0.91918367 0.81956057 0.863122 0.92213889 0.91918367 0.885171
|
|
0.85871792 0.82219219 0.83743255 0.91913329]
|
|
|
|
mean value: 0.876583576478788
|
|
|
|
key: train_mcc
|
|
value: [0.95951452 0.97754214 0.98201208 0.96177635 0.97301333 0.96854343
|
|
0.96175726 0.98201226 0.97304354 0.95956025]
|
|
|
|
mean value: 0.9698775149099321
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.90909091 0.92929293 0.95959596 0.95959596 0.93939394
|
|
0.92929293 0.90909091 0.91836735 0.95918367]
|
|
|
|
mean value: 0.9372500515357658
|
|
|
|
key: train_accuracy
|
|
value: [0.97975253 0.98875141 0.99100112 0.98087739 0.98650169 0.98425197
|
|
0.98087739 0.99100112 0.98651685 0.97977528]
|
|
|
|
mean value: 0.9849306758003564
|
|
|
|
key: test_fscore
|
|
value: [0.95918367 0.90526316 0.93203883 0.95744681 0.96 0.94339623
|
|
0.93069307 0.91428571 0.91666667 0.96 ]
|
|
|
|
mean value: 0.9378974151500625
|
|
|
|
key: train_fscore
|
|
value: [0.97982063 0.98881432 0.99103139 0.98096305 0.98651685 0.98430493
|
|
0.98087739 0.99101124 0.98654709 0.97982063]
|
|
|
|
mean value: 0.9849707507477011
|
|
|
|
key: test_precision
|
|
value: [0.95918367 0.93478261 0.88888889 1. 0.96 0.89285714
|
|
0.92156863 0.87272727 0.93617021 0.94117647]
|
|
|
|
mean value: 0.9307354897443517
|
|
|
|
key: train_precision
|
|
value: [0.97762864 0.9844098 0.98881432 0.97767857 0.98430493 0.97991071
|
|
0.97977528 0.98878924 0.98434004 0.97762864]
|
|
|
|
mean value: 0.9823280169680935
|
|
|
|
key: test_recall
|
|
value: [0.95918367 0.87755102 0.97959184 0.91836735 0.96 1.
|
|
0.94 0.96 0.89795918 0.97959184]
|
|
|
|
mean value: 0.9472244897959183
|
|
|
|
key: train_recall
|
|
value: [0.98202247 0.99325843 0.99325843 0.98426966 0.98873874 0.98873874
|
|
0.98198198 0.99324324 0.98876404 0.98202247]
|
|
|
|
mean value: 0.9876298208320681
|
|
|
|
key: test_roc_auc
|
|
value: [0.95959184 0.90877551 0.92979592 0.95918367 0.95959184 0.93877551
|
|
0.92918367 0.90857143 0.91836735 0.95918367]
|
|
|
|
mean value: 0.9371020408163265
|
|
|
|
key: train_roc_auc
|
|
value: [0.97974997 0.98874633 0.99099858 0.98087357 0.9865042 0.98425701
|
|
0.98087863 0.99100364 0.98651685 0.97977528]
|
|
|
|
mean value: 0.9849304079360259
|
|
|
|
key: test_jcc
|
|
value: [0.92156863 0.82692308 0.87272727 0.91836735 0.92307692 0.89285714
|
|
0.87037037 0.84210526 0.84615385 0.92307692]
|
|
|
|
mean value: 0.8837226792733206
|
|
|
|
key: train_jcc
|
|
value: [0.96043956 0.97787611 0.98222222 0.96263736 0.97339246 0.96909492
|
|
0.96247241 0.98218263 0.97345133 0.96043956]
|
|
|
|
mean value: 0.9704208557545047
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.23611856 0.25187445 0.12465334 0.23985076 0.24130869 0.25520205
|
|
0.14906311 0.13938284 0.24587011 0.23628402]
|
|
|
|
mean value: 0.2119607925415039
|
|
|
|
key: score_time
|
|
value: [0.04111791 0.03314471 0.01907396 0.03047752 0.03397369 0.04021382
|
|
0.03599882 0.0376358 0.02589822 0.0338583 ]
|
|
|
|
mean value: 0.033139276504516604
|
|
|
|
key: test_mcc
|
|
value: [0.90069541 0.88156478 0.90069541 0.97999192 0.89918367 0.9035079
|
|
0.94108303 0.87877551 0.87755102 0.87828292]
|
|
|
|
mean value: 0.9041331568388002
|
|
|
|
key: train_mcc
|
|
value: [0.99327351 0.99551066 1. 0.99550056 0.98877383 0.9910111
|
|
0.99325337 0.9977528 1. 0.98882646]
|
|
|
|
mean value: 0.9943902276771988
|
|
|
|
key: test_accuracy
|
|
value: [0.94949495 0.93939394 0.94949495 0.98989899 0.94949495 0.94949495
|
|
0.96969697 0.93939394 0.93877551 0.93877551]
|
|
|
|
mean value: 0.95139146567718
|
|
|
|
key: train_accuracy
|
|
value: [0.99662542 0.99775028 1. 0.99775028 0.9943757 0.99550056
|
|
0.99662542 0.99887514 1. 0.99438202]
|
|
|
|
mean value: 0.9971884834620391
|
|
|
|
key: test_fscore
|
|
value: [0.95049505 0.93617021 0.95049505 0.98969072 0.94949495 0.95238095
|
|
0.97087379 0.94 0.93877551 0.94 ]
|
|
|
|
mean value: 0.9518376231913094
|
|
|
|
key: train_fscore
|
|
value: [0.99661781 0.99774775 1. 0.99775281 0.99435028 0.99548533
|
|
0.99662542 0.9988726 1. 0.99435028]
|
|
|
|
mean value: 0.997180228798072
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.97777778 0.92307692 1. 0.95918367 0.90909091
|
|
0.94339623 0.94 0.93877551 0.92156863]
|
|
|
|
mean value: 0.9435946570562077
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.99775281 0.99773243 0.99773756
|
|
0.99550562 1. 1. 1. ]
|
|
|
|
mean value: 0.9988728409831233
|
|
|
|
key: test_recall
|
|
value: [0.97959184 0.89795918 0.97959184 0.97959184 0.94 1.
|
|
1. 0.94 0.93877551 0.95918367]
|
|
|
|
mean value: 0.961469387755102
|
|
|
|
key: train_recall
|
|
value: [0.99325843 0.99550562 1. 0.99775281 0.99099099 0.99324324
|
|
0.99774775 0.99774775 1. 0.98876404]
|
|
|
|
mean value: 0.9955010628606135
|
|
|
|
key: test_roc_auc
|
|
value: [0.94979592 0.93897959 0.94979592 0.98979592 0.94959184 0.94897959
|
|
0.96938776 0.93938776 0.93877551 0.93877551]
|
|
|
|
mean value: 0.9513265306122449
|
|
|
|
key: train_roc_auc
|
|
value: [0.99662921 0.99775281 1. 0.99775028 0.9943719 0.99549803
|
|
0.99662668 0.99887387 1. 0.99438202]
|
|
|
|
mean value: 0.997188480615447
|
|
|
|
key: test_jcc
|
|
value: [0.90566038 0.88 0.90566038 0.97959184 0.90384615 0.90909091
|
|
0.94339623 0.88679245 0.88461538 0.88679245]
|
|
|
|
mean value: 0.9085446171079594
|
|
|
|
key: train_jcc
|
|
value: [0.99325843 0.99550562 1. 0.9955157 0.98876404 0.99101124
|
|
0.99327354 0.99774775 1. 0.98876404]
|
|
|
|
mean value: 0.9943840356202426
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.54235721 0.39123535 0.39641261 0.53129673 0.54738426 0.38500595
|
|
0.44280481 0.39725327 0.39342475 0.40136409]
|
|
|
|
mean value: 0.44285390377044676
|
|
|
|
key: score_time
|
|
value: [0.03859544 0.02192593 0.03081608 0.02784944 0.03867173 0.02448344
|
|
0.04415464 0.02167201 0.02157211 0.03136826]
|
|
|
|
mean value: 0.030110907554626466
|
|
|
|
key: test_mcc
|
|
value: [0.90069541 0.81836735 0.78594951 0.69840674 0.86006806 0.89918367
|
|
0.80829204 0.82219219 0.81786082 0.76537164]
|
|
|
|
mean value: 0.8176387421183097
|
|
|
|
key: train_mcc
|
|
value: [0.95731532 0.96859167 0.96404313 0.96404313 0.95966126 0.96187469
|
|
0.96181607 0.96401439 0.96428845 0.96185847]
|
|
|
|
mean value: 0.9627506565215252
|
|
|
|
key: test_accuracy
|
|
value: [0.94949495 0.90909091 0.88888889 0.84848485 0.92929293 0.94949495
|
|
0.8989899 0.90909091 0.90816327 0.87755102]
|
|
|
|
mean value: 0.9068542568542568
|
|
|
|
key: train_accuracy
|
|
value: [0.97862767 0.98425197 0.98200225 0.98200225 0.97975253 0.98087739
|
|
0.98087739 0.98200225 0.98202247 0.98089888]
|
|
|
|
mean value: 0.9813315049102008
|
|
|
|
key: test_fscore
|
|
value: [0.95049505 0.90909091 0.8952381 0.85148515 0.93203883 0.94949495
|
|
0.90740741 0.91428571 0.90526316 0.88679245]
|
|
|
|
mean value: 0.9101591719213259
|
|
|
|
key: train_fscore
|
|
value: [0.97877095 0.984375 0.98210291 0.98210291 0.97991071 0.98100559
|
|
0.98096305 0.98202247 0.98222222 0.98100559]
|
|
|
|
mean value: 0.9814481393790541
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.9 0.83928571 0.82692308 0.90566038 0.95918367
|
|
0.84482759 0.87272727 0.93478261 0.8245614 ]
|
|
|
|
mean value: 0.8831028636252186
|
|
|
|
key: train_precision
|
|
value: [0.97333333 0.97782705 0.97772829 0.97772829 0.97123894 0.97339246
|
|
0.97550111 0.97982063 0.97142857 0.97555556]
|
|
|
|
mean value: 0.9753554222110018
|
|
|
|
key: test_recall
|
|
value: [0.97959184 0.91836735 0.95918367 0.87755102 0.96 0.94
|
|
0.98 0.96 0.87755102 0.95918367]
|
|
|
|
mean value: 0.9411428571428571
|
|
|
|
key: train_recall
|
|
value: [0.98426966 0.99101124 0.98651685 0.98651685 0.98873874 0.98873874
|
|
0.98648649 0.98423423 0.99325843 0.98651685]
|
|
|
|
mean value: 0.9876288085838648
|
|
|
|
key: test_roc_auc
|
|
value: [0.94979592 0.90918367 0.88959184 0.84877551 0.92897959 0.94959184
|
|
0.89816327 0.90857143 0.90816327 0.87755102]
|
|
|
|
mean value: 0.9068367346938776
|
|
|
|
key: train_roc_auc
|
|
value: [0.97862132 0.98424436 0.98199717 0.98199717 0.97976263 0.98088622
|
|
0.98088369 0.98200476 0.98202247 0.98089888]
|
|
|
|
mean value: 0.9813318655734385
|
|
|
|
key: test_jcc
|
|
value: [0.90566038 0.83333333 0.81034483 0.74137931 0.87272727 0.90384615
|
|
0.83050847 0.84210526 0.82692308 0.79661017]
|
|
|
|
mean value: 0.8363438259345053
|
|
|
|
key: train_jcc
|
|
value: [0.95842451 0.96923077 0.96483516 0.96483516 0.96061269 0.9627193
|
|
0.96263736 0.96467991 0.9650655 0.9627193 ]
|
|
|
|
mean value: 0.9635759671037601
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.74911952 1.72358584 1.70547414 1.72716212 1.70538068 1.71679974
|
|
1.71426439 1.69969511 1.76378655 1.72080326]
|
|
|
|
mean value: 1.722607135772705
|
|
|
|
key: score_time
|
|
value: [0.01020241 0.00962186 0.00990653 0.00976205 0.00992346 0.00996399
|
|
0.00976038 0.00962758 0.01055717 0.00958347]
|
|
|
|
mean value: 0.009890890121459961
|
|
|
|
key: test_mcc
|
|
value: [0.93959184 0.89914258 0.88175388 0.97999192 0.93959184 0.82623193
|
|
0.94108303 0.93956725 0.89814624 0.88048967]
|
|
|
|
mean value: 0.9125590170870845
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.99775281 0.9977528 0.99775281
|
|
1. 1. 1. 0.99550562]
|
|
|
|
mean value: 0.9988764033582966
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.94949495 0.93939394 0.98989899 0.96969697 0.90909091
|
|
0.96969697 0.96969697 0.94897959 0.93877551]
|
|
|
|
mean value: 0.9554421768707483
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.99887514 0.99887514 0.99887514
|
|
1. 1. 1. 0.99775281]
|
|
|
|
mean value: 0.9994378230811036
|
|
|
|
key: test_fscore
|
|
value: [0.96969697 0.94845361 0.94117647 0.98969072 0.96969697 0.91588785
|
|
0.97087379 0.97029703 0.94845361 0.94117647]
|
|
|
|
mean value: 0.9565403485292767
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.99887514 0.9988726 0.99887514
|
|
1. 1. 1. 0.99775281]
|
|
|
|
mean value: 0.9994375694487716
|
|
|
|
key: test_precision
|
|
value: [0.96 0.95833333 0.90566038 1. 0.97959184 0.85964912
|
|
0.94339623 0.96078431 0.95833333 0.90566038]
|
|
|
|
mean value: 0.9431408921065944
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.99775281
|
|
1. 1. 1. 0.99775281]
|
|
|
|
mean value: 0.9995505617977528
|
|
|
|
key: test_recall
|
|
value: [0.97959184 0.93877551 0.97959184 0.97959184 0.96 0.98
|
|
1. 0.98 0.93877551 0.97959184]
|
|
|
|
mean value: 0.9715918367346938
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.99775281 0.99774775 1.
|
|
1. 1. 1. 0.99775281]
|
|
|
|
mean value: 0.9993253365725276
|
|
|
|
key: test_roc_auc
|
|
value: [0.96979592 0.94938776 0.93979592 0.98979592 0.96979592 0.90836735
|
|
0.96938776 0.96959184 0.94897959 0.93877551]
|
|
|
|
mean value: 0.9553673469387756
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.9988764 0.99887387 0.9988764
|
|
1. 1. 1. 0.99775281]
|
|
|
|
mean value: 0.9994379491851402
|
|
|
|
key: test_jcc
|
|
value: [0.94117647 0.90196078 0.88888889 0.97959184 0.94117647 0.84482759
|
|
0.94339623 0.94230769 0.90196078 0.88888889]
|
|
|
|
mean value: 0.9174175629246076
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.99775281 0.99774775 0.99775281
|
|
1. 1. 1. 0.9955157 ]
|
|
|
|
mean value: 0.9988769060792541
|
|
|
|
MCC on Blind test: 0.73
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03966689 0.04542542 0.04395771 0.0456686 0.04324579 0.04735518
|
|
0.04396725 0.04551435 0.04363108 0.06029344]
|
|
|
|
mean value: 0.04587256908416748
|
|
|
|
key: score_time
|
|
value: [0.01312613 0.01294804 0.01454949 0.01306391 0.01294231 0.01490688
|
|
0.01434541 0.01444459 0.01446128 0.02317047]
|
|
|
|
mean value: 0.014795851707458497
|
|
|
|
key: test_mcc
|
|
value: [0.98 1. 0.96039208 0.98 1. 0.97999192
|
|
0.97999192 0.96036035 0.9797959 0.9797959 ]
|
|
|
|
mean value: 0.9800328046991096
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.98989899 1. 0.97979798 0.98989899 1. 0.98989899
|
|
0.98989899 0.97979798 0.98979592 0.98979592]
|
|
|
|
mean value: 0.9898783755926613
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98989899 1. 0.98 0.98989899 1. 0.99009901
|
|
0.99009901 0.98039216 0.98969072 0.98969072]
|
|
|
|
mean value: 0.9899769599761674
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.98 1. 0.96078431 0.98 1. 0.98039216
|
|
0.98039216 0.96153846 1. 1. ]
|
|
|
|
mean value: 0.9843107088989442
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 0.97959184 0.97959184]
|
|
|
|
mean value: 0.9959183673469387
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.99 1. 0.98 0.99 1. 0.98979592
|
|
0.98979592 0.97959184 0.98979592 0.98979592]
|
|
|
|
mean value: 0.9898775510204082
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.98 1. 0.96078431 0.98 1. 0.98039216
|
|
0.98039216 0.96153846 0.97959184 0.97959184]
|
|
|
|
mean value: 0.980229076245883
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03728676 0.04709744 0.04066491 0.03484035 0.01868415 0.01835871
|
|
0.0184226 0.04515076 0.04588985 0.03935552]
|
|
|
|
mean value: 0.03457510471343994
|
|
|
|
key: score_time
|
|
value: [0.01977038 0.01724553 0.01929116 0.02901626 0.01238322 0.01966977
|
|
0.01561284 0.02869391 0.02514434 0.02857709]
|
|
|
|
mean value: 0.021540451049804687
|
|
|
|
key: test_mcc
|
|
value: [0.92226137 0.83836735 0.79130589 0.90069541 0.87877551 0.76727671
|
|
0.885171 0.885171 0.89814624 0.84811452]
|
|
|
|
mean value: 0.8615284995621457
|
|
|
|
key: train_mcc
|
|
value: [0.90625081 0.90897384 0.91327609 0.91347959 0.90043189 0.91799474
|
|
0.90878098 0.90212325 0.90930126 0.91337716]
|
|
|
|
mean value: 0.9093989618366219
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.91919192 0.88888889 0.94949495 0.93939394 0.87878788
|
|
0.93939394 0.93939394 0.94897959 0.91836735]
|
|
|
|
mean value: 0.9281488352916925
|
|
|
|
key: train_accuracy
|
|
value: [0.95275591 0.95388076 0.95613048 0.95613048 0.94938133 0.9583802
|
|
0.95388076 0.95050619 0.95393258 0.95617978]
|
|
|
|
mean value: 0.9541158478785657
|
|
|
|
key: test_fscore
|
|
value: [0.96078431 0.91836735 0.89719626 0.95049505 0.94 0.88888889
|
|
0.94339623 0.94339623 0.94949495 0.9245283 ]
|
|
|
|
mean value: 0.9316547564952279
|
|
|
|
key: train_fscore
|
|
value: [0.95374449 0.9550931 0.9571899 0.95728368 0.95081967 0.95938529
|
|
0.95489549 0.95164835 0.95519126 0.9571899 ]
|
|
|
|
mean value: 0.9552441136701847
|
|
|
|
key: test_precision
|
|
value: [0.9245283 0.91836735 0.82758621 0.92307692 0.94 0.82758621
|
|
0.89285714 0.89285714 0.94 0.85964912]
|
|
|
|
mean value: 0.8946508394216898
|
|
|
|
key: train_precision
|
|
value: [0.93520518 0.93162393 0.93562232 0.93376068 0.92356688 0.93576017
|
|
0.93333333 0.92918455 0.92978723 0.93562232]
|
|
|
|
mean value: 0.9323466601182273
|
|
|
|
key: test_recall
|
|
value: [1. 0.91836735 0.97959184 0.97959184 0.94 0.96
|
|
1. 1. 0.95918367 1. ]
|
|
|
|
mean value: 0.9736734693877551
|
|
|
|
key: train_recall
|
|
value: [0.97303371 0.97977528 0.97977528 0.98202247 0.97972973 0.98423423
|
|
0.97747748 0.97522523 0.98202247 0.97977528]
|
|
|
|
mean value: 0.979307116104869
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.91918367 0.88979592 0.94979592 0.93938776 0.87795918
|
|
0.93877551 0.93877551 0.94897959 0.91836735]
|
|
|
|
mean value: 0.9281020408163265
|
|
|
|
key: train_roc_auc
|
|
value: [0.95273307 0.9538516 0.95610386 0.95610133 0.94941543 0.95840925
|
|
0.95390728 0.95053396 0.95393258 0.95617978]
|
|
|
|
mean value: 0.9541168134426562
|
|
|
|
key: test_jcc
|
|
value: [0.9245283 0.8490566 0.81355932 0.90566038 0.88679245 0.8
|
|
0.89285714 0.89285714 0.90384615 0.85964912]
|
|
|
|
mean value: 0.8728806620250412
|
|
|
|
key: train_jcc
|
|
value: [0.91157895 0.91404612 0.91789474 0.91806723 0.90625 0.92194093
|
|
0.91368421 0.90775681 0.91422594 0.91789474]
|
|
|
|
mean value: 0.9143339663172821
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.33705473 0.3472724 0.34438944 0.4375844 0.35150576 0.3412447
|
|
0.340832 0.33433557 0.38620925 0.38141108]
|
|
|
|
mean value: 0.36018393039703367
|
|
|
|
key: score_time
|
|
value: [0.01953578 0.0201416 0.02351403 0.01933432 0.01915622 0.0192275
|
|
0.01921344 0.01930308 0.02181935 0.01911068]
|
|
|
|
mean value: 0.020035600662231444
|
|
|
|
key: test_mcc
|
|
value: [0.92226137 0.83836735 0.79130589 0.90069541 0.89918367 0.76727671
|
|
0.88156478 0.83174644 0.89814624 0.88048967]
|
|
|
|
mean value: 0.8611037528428882
|
|
|
|
key: train_mcc
|
|
value: [0.90625081 0.90897384 0.91327609 0.91347959 0.92628953 0.91799474
|
|
0.93297358 0.93054554 0.92869037 0.92435201]
|
|
|
|
mean value: 0.9202826113963661
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.91919192 0.88888889 0.94949495 0.94949495 0.87878788
|
|
0.93939394 0.90909091 0.94897959 0.93877551]
|
|
|
|
mean value: 0.9281694495980211
|
|
|
|
key: train_accuracy
|
|
value: [0.95275591 0.95388076 0.95613048 0.95613048 0.96287964 0.9583802
|
|
0.96625422 0.96512936 0.96404494 0.96179775]
|
|
|
|
mean value: 0.9597383753997043
|
|
|
|
key: test_fscore
|
|
value: [0.96078431 0.91836735 0.89719626 0.95049505 0.94949495 0.88888889
|
|
0.94230769 0.91743119 0.94949495 0.94117647]
|
|
|
|
mean value: 0.9315637115286726
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:136: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:139: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.95374449 0.9550931 0.9571899 0.95728368 0.96345515 0.95938529
|
|
0.96674058 0.96551724 0.96467991 0.96255507]
|
|
|
|
mean value: 0.9605644410492047
|
|
|
|
key: test_precision
|
|
value: [0.9245283 0.91836735 0.82758621 0.92307692 0.95918367 0.82758621
|
|
0.90740741 0.84745763 0.94 0.90566038]
|
|
|
|
mean value: 0.8980854071049524
|
|
|
|
key: train_precision
|
|
value: [0.93520518 0.93162393 0.93562232 0.93376068 0.94771242 0.93576017
|
|
0.95196507 0.95384615 0.94793926 0.94384449]
|
|
|
|
mean value: 0.9417279680435185
|
|
|
|
key: test_recall
|
|
value: [1. 0.91836735 0.97959184 0.97959184 0.94 0.96
|
|
0.98 1. 0.95918367 0.97959184]
|
|
|
|
mean value: 0.9696326530612245
|
|
|
|
key: train_recall
|
|
value: [0.97303371 0.97977528 0.97977528 0.98202247 0.97972973 0.98423423
|
|
0.98198198 0.97747748 0.98202247 0.98202247]
|
|
|
|
mean value: 0.9802075108816682
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.91918367 0.88979592 0.94979592 0.94959184 0.87795918
|
|
0.93897959 0.90816327 0.94897959 0.93877551]
|
|
|
|
mean value: 0.9281224489795918
|
|
|
|
key: train_roc_auc
|
|
value: [0.95273307 0.9538516 0.95610386 0.95610133 0.96289857 0.95840925
|
|
0.96627189 0.96514323 0.96404494 0.96179775]
|
|
|
|
mean value: 0.9597355501568985
|
|
|
|
key: test_jcc
|
|
value: [0.9245283 0.8490566 0.81355932 0.90566038 0.90384615 0.8
|
|
0.89090909 0.84745763 0.90384615 0.88888889]
|
|
|
|
mean value: 0.8727752519661698
|
|
|
|
key: train_jcc
|
|
value: [0.91157895 0.91404612 0.91789474 0.91806723 0.92948718 0.92194093
|
|
0.93562232 0.93333333 0.93176972 0.92781316]
|
|
|
|
mean value: 0.9241553677678147
|
|
|
|
MCC on Blind test: 0.66
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03950262 0.05310416 0.04243994 0.04210258 0.04227042 0.04284549
|
|
0.04315495 0.04158282 0.04218006 0.04253292]
|
|
|
|
mean value: 0.04317159652709961
|
|
|
|
key: score_time
|
|
value: [0.01518965 0.01538396 0.01587033 0.01518583 0.01496124 0.01507592
|
|
0.01499557 0.01213455 0.01520848 0.01507354]
|
|
|
|
mean value: 0.014907908439636231
|
|
|
|
key: test_mcc
|
|
value: [0.75755102 0.77786858 0.67741941 0.83836735 0.7793222 0.71926834
|
|
0.73755102 0.78153434 0.79658219 0.79858365]
|
|
|
|
mean value: 0.7664048095240629
|
|
|
|
key: train_mcc
|
|
value: [0.83832069 0.83612261 0.8382281 0.8427288 0.80432758 0.84487673
|
|
0.80429105 0.83654272 0.82934751 0.81573858]
|
|
|
|
mean value: 0.8290524369722565
|
|
|
|
key: test_accuracy
|
|
value: [0.87878788 0.88888889 0.83838384 0.91919192 0.88888889 0.85858586
|
|
0.86868687 0.88888889 0.89795918 0.89795918]
|
|
|
|
mean value: 0.8826221397649969
|
|
|
|
key: train_accuracy
|
|
value: [0.91901012 0.91788526 0.91901012 0.92125984 0.90213723 0.9223847
|
|
0.90213723 0.91788526 0.91460674 0.90786517]
|
|
|
|
mean value: 0.9144181696389075
|
|
|
|
key: test_fscore
|
|
value: [0.87755102 0.88659794 0.84 0.91836735 0.88659794 0.86538462
|
|
0.86868687 0.8952381 0.9 0.90196078]
|
|
|
|
mean value: 0.8840384607258903
|
|
|
|
key: train_fscore
|
|
value: [0.92017738 0.91915836 0.92 0.92222222 0.90257559 0.92290503
|
|
0.90169492 0.91951488 0.91536748 0.90807175]
|
|
|
|
mean value: 0.9151687614335074
|
|
|
|
key: test_precision
|
|
value: [0.87755102 0.89583333 0.82352941 0.91836735 0.91489362 0.83333333
|
|
0.87755102 0.85454545 0.88235294 0.86792453]
|
|
|
|
mean value: 0.8745882007231564
|
|
|
|
key: train_precision
|
|
value: [0.90809628 0.90611354 0.90989011 0.91208791 0.89755011 0.91574279
|
|
0.9047619 0.90064795 0.90728477 0.90604027]
|
|
|
|
mean value: 0.906821563392795
|
|
|
|
key: test_recall
|
|
value: [0.87755102 0.87755102 0.85714286 0.91836735 0.86 0.9
|
|
0.86 0.94 0.91836735 0.93877551]
|
|
|
|
mean value: 0.8947755102040816
|
|
|
|
key: train_recall
|
|
value: [0.93258427 0.93258427 0.93033708 0.93258427 0.90765766 0.93018018
|
|
0.89864865 0.93918919 0.92359551 0.91011236]
|
|
|
|
mean value: 0.9237473428484664
|
|
|
|
key: test_roc_auc
|
|
value: [0.87877551 0.88877551 0.83857143 0.91918367 0.88918367 0.85816327
|
|
0.86877551 0.88836735 0.89795918 0.89795918]
|
|
|
|
mean value: 0.8825714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.91899484 0.91786871 0.91899737 0.92124709 0.90214344 0.92239346
|
|
0.90213331 0.9179092 0.91460674 0.90786517]
|
|
|
|
mean value: 0.9144159327867193
|
|
|
|
key: test_jcc
|
|
value: [0.78181818 0.7962963 0.72413793 0.8490566 0.7962963 0.76271186
|
|
0.76785714 0.81034483 0.81818182 0.82142857]
|
|
|
|
mean value: 0.7928129533679361
|
|
|
|
key: train_jcc
|
|
value: [0.85215606 0.85040984 0.85185185 0.8556701 0.82244898 0.85684647
|
|
0.82098765 0.85102041 0.84394251 0.83162218]
|
|
|
|
mean value: 0.8436956045335057
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.16511536 1.0343082 1.15223002 1.0444715 1.11826062 1.0268805
|
|
1.1583035 1.02212262 1.15169621 1.01887035]
|
|
|
|
mean value: 1.0892258882522583
|
|
|
|
key: score_time
|
|
value: [0.01472592 0.02150321 0.0154078 0.0154922 0.01539779 0.01517129
|
|
0.01544642 0.0152576 0.01522374 0.01527762]
|
|
|
|
mean value: 0.01589035987854004
|
|
|
|
key: test_mcc
|
|
value: [0.90069541 0.92226137 0.83232122 0.89918367 0.96036035 0.86285005
|
|
0.86710997 0.83174644 0.90267093 0.92144268]
|
|
|
|
mean value: 0.8900642088919748
|
|
|
|
key: train_mcc
|
|
value: [0.94265941 0.95381851 0.95166984 0.94461769 0.95382331 0.94266567
|
|
0.95597592 0.95813297 0.96684156 0.96033651]
|
|
|
|
mean value: 0.9530541396232116
|
|
|
|
key: test_accuracy
|
|
value: [0.94949495 0.95959596 0.90909091 0.94949495 0.97979798 0.92929293
|
|
0.92929293 0.90909091 0.94897959 0.95918367]
|
|
|
|
mean value: 0.9423314780457638
|
|
|
|
key: train_accuracy
|
|
value: [0.97075366 0.97637795 0.97525309 0.97187852 0.97637795 0.97075366
|
|
0.97750281 0.97862767 0.98314607 0.97977528]
|
|
|
|
mean value: 0.976044665765094
|
|
|
|
key: test_fscore
|
|
value: [0.95049505 0.96078431 0.91588785 0.94949495 0.98039216 0.93333333
|
|
0.93457944 0.91743119 0.95145631 0.96078431]
|
|
|
|
mean value: 0.9454638909706747
|
|
|
|
key: train_fscore
|
|
value: [0.97149123 0.97694841 0.97587719 0.97249725 0.97689769 0.97142857
|
|
0.97797357 0.97905182 0.98342541 0.98017621]
|
|
|
|
mean value: 0.9765767353602081
|
|
|
|
key: test_precision
|
|
value: [0.92307692 0.9245283 0.84482759 0.94 0.96153846 0.89090909
|
|
0.87719298 0.84745763 0.90740741 0.9245283 ]
|
|
|
|
mean value: 0.9041466682487149
|
|
|
|
key: train_precision
|
|
value: [0.94860814 0.95493562 0.95289079 0.95258621 0.95483871 0.94849785
|
|
0.95689655 0.95896328 0.9673913 0.96112311]
|
|
|
|
mean value: 0.9556731571465527
|
|
|
|
key: test_recall
|
|
value: [0.97959184 1. 1. 0.95918367 1. 0.98
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9918775510204082
|
|
|
|
key: train_recall
|
|
value: [0.99550562 1. 1. 0.99325843 1. 0.9954955
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9984259540439315
|
|
|
|
key: test_roc_auc
|
|
value: [0.94979592 0.96 0.91 0.94959184 0.97959184 0.92877551
|
|
0.92857143 0.90816327 0.94897959 0.95918367]
|
|
|
|
mean value: 0.942265306122449
|
|
|
|
key: train_roc_auc
|
|
value: [0.97072578 0.97635135 0.97522523 0.97185444 0.97640449 0.97078146
|
|
0.97752809 0.97865169 0.98314607 0.97977528]
|
|
|
|
mean value: 0.9760443870837129
|
|
|
|
key: test_jcc
|
|
value: [0.90566038 0.9245283 0.84482759 0.90384615 0.96153846 0.875
|
|
0.87719298 0.84745763 0.90740741 0.9245283 ]
|
|
|
|
mean value: 0.897198719970578
|
|
|
|
key: train_jcc
|
|
value: [0.9445629 0.95493562 0.95289079 0.94646681 0.95483871 0.94444444
|
|
0.95689655 0.95896328 0.9673913 0.96112311]
|
|
|
|
mean value: 0.954251352709982
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01651382 0.01206088 0.01193905 0.01162362 0.01157784 0.01181126
|
|
0.01189542 0.01192665 0.01168752 0.01170993]
|
|
|
|
mean value: 0.012274599075317383
|
|
|
|
key: score_time
|
|
value: [0.01252508 0.00960469 0.00923085 0.00917745 0.00909686 0.00912166
|
|
0.0091188 0.00911403 0.00912857 0.00913358]
|
|
|
|
mean value: 0.009525156021118164
|
|
|
|
key: test_mcc
|
|
value: [0.64423071 0.4753089 0.51532527 0.55361257 0.70082556 0.5555102
|
|
0.63812011 0.51639778 0.65982888 0.65428866]
|
|
|
|
mean value: 0.5913448652794157
|
|
|
|
key: train_mcc
|
|
value: [0.66279356 0.63179889 0.67254151 0.57386664 0.65395267 0.67889489
|
|
0.65952039 0.60332899 0.61839929 0.63441468]
|
|
|
|
mean value: 0.6389511508593262
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.72727273 0.75757576 0.76767677 0.84848485 0.77777778
|
|
0.81818182 0.75757576 0.82653061 0.82653061]
|
|
|
|
mean value: 0.7925788497217069
|
|
|
|
key: train_accuracy
|
|
value: [0.83127109 0.8143982 0.83577053 0.77727784 0.82677165 0.83914511
|
|
0.82902137 0.79977503 0.80674157 0.81685393]
|
|
|
|
mean value: 0.8177026326765334
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.6746988 0.75 0.72941176 0.84210526 0.78
|
|
0.82692308 0.76923077 0.81318681 0.82105263]
|
|
|
|
mean value: 0.7806609113964107
|
|
|
|
key: train_fscore
|
|
value: [0.82915718 0.80519481 0.83140878 0.74484536 0.8233945 0.83544304
|
|
0.82284382 0.7875895 0.79376499 0.81242808]
|
|
|
|
mean value: 0.8086070038698193
|
|
|
|
key: test_precision
|
|
value: [0.87804878 0.82352941 0.76595745 0.86111111 0.88888889 0.78
|
|
0.7962963 0.74074074 0.88095238 0.84782609]
|
|
|
|
mean value: 0.8263351144006961
|
|
|
|
key: train_precision
|
|
value: [0.84064665 0.84825871 0.85510689 0.87311178 0.83878505 0.85411765
|
|
0.852657 0.83756345 0.85089974 0.83254717]
|
|
|
|
mean value: 0.8483694091713531
|
|
|
|
key: test_recall
|
|
value: [0.73469388 0.57142857 0.73469388 0.63265306 0.8 0.78
|
|
0.86 0.8 0.75510204 0.79591837]
|
|
|
|
mean value: 0.7464489795918368
|
|
|
|
key: train_recall
|
|
value: [0.81797753 0.76629213 0.80898876 0.6494382 0.80855856 0.81756757
|
|
0.79504505 0.74324324 0.74382022 0.79325843]
|
|
|
|
mean value: 0.774418969531329
|
|
|
|
key: test_roc_auc
|
|
value: [0.81734694 0.72571429 0.75734694 0.76632653 0.84897959 0.7777551
|
|
0.8177551 0.75714286 0.82653061 0.82653061]
|
|
|
|
mean value: 0.7921428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.83128606 0.81445237 0.83580069 0.7774218 0.82675119 0.83912086
|
|
0.8289832 0.79971151 0.80674157 0.81685393]
|
|
|
|
mean value: 0.8177123190606337
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.50909091 0.6 0.57407407 0.72727273 0.63934426
|
|
0.70491803 0.625 0.68518519 0.69642857]
|
|
|
|
mean value: 0.6427980428800101
|
|
|
|
key: train_jcc
|
|
value: [0.70817121 0.67391304 0.71146245 0.59342916 0.69980507 0.7173913
|
|
0.6990099 0.6496063 0.65805169 0.68410853]
|
|
|
|
mean value: 0.6794948648176973
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.011971 0.01645732 0.01638913 0.01642466 0.01638365 0.01640821
|
|
0.02271938 0.0163722 0.01632929 0.0164125 ]
|
|
|
|
mean value: 0.016586732864379884
|
|
|
|
key: score_time
|
|
value: [0.01203513 0.01229119 0.01231694 0.0123539 0.01233554 0.01238179
|
|
0.01259589 0.01234007 0.01239061 0.01234722]
|
|
|
|
mean value: 0.012338829040527344
|
|
|
|
key: test_mcc
|
|
value: [0.5365758 0.31303661 0.5355102 0.597546 0.57651565 0.47486754
|
|
0.55578301 0.47758063 0.55147997 0.51407258]
|
|
|
|
mean value: 0.5132967995255227
|
|
|
|
key: train_mcc
|
|
value: [0.55070563 0.56153007 0.56203173 0.54608634 0.55026989 0.55005896
|
|
0.5275846 0.54555623 0.55304503 0.54383258]
|
|
|
|
mean value: 0.549070106228705
|
|
|
|
key: test_accuracy
|
|
value: [0.76767677 0.65656566 0.76767677 0.7979798 0.78787879 0.73737374
|
|
0.77777778 0.73737374 0.7755102 0.75510204]
|
|
|
|
mean value: 0.7560915275200989
|
|
|
|
key: train_accuracy
|
|
value: [0.77502812 0.78065242 0.78065242 0.7727784 0.77502812 0.77502812
|
|
0.76377953 0.7727784 0.77640449 0.77191011]
|
|
|
|
mean value: 0.7744040141049785
|
|
|
|
key: test_fscore
|
|
value: [0.77227723 0.64583333 0.76767677 0.78723404 0.7961165 0.74509804
|
|
0.78431373 0.75471698 0.77083333 0.76923077]
|
|
|
|
mean value: 0.7593330724542494
|
|
|
|
key: train_fscore
|
|
value: [0.78070175 0.78405316 0.7864184 0.77802198 0.77777778 0.77426637
|
|
0.76457399 0.77252252 0.77962348 0.77267637]
|
|
|
|
mean value: 0.7770635795528944
|
|
|
|
key: test_precision
|
|
value: [0.75 0.65957447 0.76 0.82222222 0.77358491 0.73076923
|
|
0.76923077 0.71428571 0.78723404 0.72727273]
|
|
|
|
mean value: 0.7494174080079339
|
|
|
|
key: train_precision
|
|
value: [0.76231263 0.77292576 0.76709402 0.76129032 0.76754386 0.7760181
|
|
0.76116071 0.77252252 0.76855895 0.77008929]
|
|
|
|
mean value: 0.7679516171384001
|
|
|
|
key: test_recall
|
|
value: [0.79591837 0.63265306 0.7755102 0.75510204 0.82 0.76
|
|
0.8 0.8 0.75510204 0.81632653]
|
|
|
|
mean value: 0.7710612244897959
|
|
|
|
key: train_recall
|
|
value: [0.8 0.79550562 0.80674157 0.79550562 0.78828829 0.77252252
|
|
0.76801802 0.77252252 0.79101124 0.7752809 ]
|
|
|
|
mean value: 0.7865396295171576
|
|
|
|
key: test_roc_auc
|
|
value: [0.76795918 0.65632653 0.7677551 0.79755102 0.78755102 0.73714286
|
|
0.77755102 0.73673469 0.7755102 0.75510204]
|
|
|
|
mean value: 0.7559183673469387
|
|
|
|
key: train_roc_auc
|
|
value: [0.775 0.78063569 0.78062304 0.77275281 0.77504302 0.77502531
|
|
0.76378429 0.77277812 0.77640449 0.77191011]
|
|
|
|
mean value: 0.7743956878226541
|
|
|
|
key: test_jcc
|
|
value: [0.62903226 0.47692308 0.62295082 0.64912281 0.66129032 0.59375
|
|
0.64516129 0.60606061 0.62711864 0.625 ]
|
|
|
|
mean value: 0.6136409824708896
|
|
|
|
key: train_jcc
|
|
value: [0.64028777 0.64480874 0.64801444 0.63669065 0.63636364 0.63167587
|
|
0.61887477 0.6293578 0.63883848 0.62956204]
|
|
|
|
mean value: 0.635447420260183
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01525736 0.01225066 0.01113534 0.01089811 0.01187563 0.01173234
|
|
0.01077724 0.01155543 0.01171041 0.01186252]
|
|
|
|
mean value: 0.011905503273010255
|
|
|
|
key: score_time
|
|
value: [0.04147673 0.02037382 0.02033448 0.02001977 0.02089453 0.02058625
|
|
0.01613092 0.01428127 0.0149343 0.01493406]
|
|
|
|
mean value: 0.020396614074707033
|
|
|
|
key: test_mcc
|
|
value: [0.76023142 0.863122 0.71597164 0.75029056 0.67960716 0.78153434
|
|
0.66706065 0.72213485 0.84307902 0.76200076]
|
|
|
|
mean value: 0.7545032403327636
|
|
|
|
key: train_mcc
|
|
value: [0.86287456 0.84464874 0.86591046 0.84738931 0.85085914 0.85085839
|
|
0.85493258 0.84791953 0.84900073 0.8481633 ]
|
|
|
|
mean value: 0.8522556740816365
|
|
|
|
key: test_accuracy
|
|
value: [0.87878788 0.92929293 0.83838384 0.86868687 0.82828283 0.88888889
|
|
0.82828283 0.85858586 0.91836735 0.86734694]
|
|
|
|
mean value: 0.8704906204906204
|
|
|
|
key: train_accuracy
|
|
value: [0.928009 0.9167604 0.92913386 0.91901012 0.92125984 0.9223847
|
|
0.92463442 0.92125984 0.92022472 0.91910112]
|
|
|
|
mean value: 0.9221778036172446
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.93203883 0.85964912 0.87850467 0.84955752 0.8952381
|
|
0.8440367 0.86792453 0.92307692 0.88288288]
|
|
|
|
mean value: 0.8815262220703528
|
|
|
|
key: train_fscore
|
|
value: [0.93234672 0.92307692 0.93361433 0.92468619 0.92631579 0.92667375
|
|
0.9286475 0.92537313 0.92549843 0.92484342]
|
|
|
|
mean value: 0.9271076191755222
|
|
|
|
key: test_precision
|
|
value: [0.8490566 0.88888889 0.75384615 0.81034483 0.76190476 0.85454545
|
|
0.77966102 0.82142857 0.87272727 0.79032258]
|
|
|
|
mean value: 0.8182726132295208
|
|
|
|
key: train_precision
|
|
value: [0.88023952 0.85880077 0.87896825 0.86497065 0.86956522 0.87726358
|
|
0.88080808 0.87854251 0.86811024 0.86354776]
|
|
|
|
mean value: 0.8720816578728141
|
|
|
|
key: test_recall
|
|
value: [0.91836735 0.97959184 1. 0.95918367 0.96 0.94
|
|
0.92 0.92 0.97959184 1. ]
|
|
|
|
mean value: 0.9576734693877551
|
|
|
|
key: train_recall
|
|
value: [0.99101124 0.99775281 0.99550562 0.99325843 0.99099099 0.98198198
|
|
0.98198198 0.97747748 0.99101124 0.99550562]
|
|
|
|
mean value: 0.9896477376252657
|
|
|
|
key: test_roc_auc
|
|
value: [0.87918367 0.92979592 0.84 0.86959184 0.82693878 0.88836735
|
|
0.82734694 0.85795918 0.91836735 0.86734694]
|
|
|
|
mean value: 0.8704897959183673
|
|
|
|
key: train_roc_auc
|
|
value: [0.92793805 0.9166692 0.92905912 0.91892651 0.92133819 0.92245167
|
|
0.92469886 0.92132301 0.92022472 0.91910112]
|
|
|
|
mean value: 0.9221730438303473
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.87272727 0.75384615 0.78333333 0.73846154 0.81034483
|
|
0.73015873 0.76666667 0.85714286 0.79032258]
|
|
|
|
mean value: 0.7892477644778446
|
|
|
|
key: train_jcc
|
|
value: [0.87326733 0.85714286 0.87549407 0.85992218 0.8627451 0.86336634
|
|
0.8667992 0.86111111 0.86132812 0.86019417]
|
|
|
|
mean value: 0.8641370484322746
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05479598 0.05015039 0.04996562 0.05446219 0.04805422 0.04615331
|
|
0.05590534 0.04501343 0.05188608 0.04433513]
|
|
|
|
mean value: 0.05007216930389404
|
|
|
|
key: score_time
|
|
value: [0.02043772 0.01775289 0.02041602 0.02089858 0.01845264 0.01802468
|
|
0.02313566 0.01773548 0.01794934 0.01774383]
|
|
|
|
mean value: 0.019254684448242188
|
|
|
|
key: test_mcc
|
|
value: [0.79852941 0.79852941 0.73755102 0.86006806 0.79869341 0.71761394
|
|
0.83898714 0.70021862 0.91913329 0.77957944]
|
|
|
|
mean value: 0.7948903758483772
|
|
|
|
key: train_mcc
|
|
value: [0.8245995 0.83802004 0.84252881 0.82455332 0.82232177 0.829152
|
|
0.82452171 0.84709378 0.83371629 0.81814487]
|
|
|
|
mean value: 0.8304652103547102
|
|
|
|
key: test_accuracy
|
|
value: [0.8989899 0.8989899 0.86868687 0.92929293 0.8989899 0.85858586
|
|
0.91919192 0.84848485 0.95918367 0.8877551 ]
|
|
|
|
mean value: 0.8968150896722326
|
|
|
|
key: train_accuracy
|
|
value: [0.91226097 0.91901012 0.92125984 0.91226097 0.91113611 0.91451069
|
|
0.91226097 0.92350956 0.91685393 0.90898876]
|
|
|
|
mean value: 0.9152051920476232
|
|
|
|
key: test_fscore
|
|
value: [0.89583333 0.89583333 0.86868687 0.92631579 0.89795918 0.8627451
|
|
0.92156863 0.85714286 0.96 0.89320388]
|
|
|
|
mean value: 0.8979288974628887
|
|
|
|
key: train_fscore
|
|
value: [0.91176471 0.91910112 0.92117117 0.91275168 0.91053228 0.91363636
|
|
0.91216216 0.92290249 0.91704036 0.90805902]
|
|
|
|
mean value: 0.9149121357542604
|
|
|
|
key: test_precision
|
|
value: [0.91489362 0.91489362 0.86 0.95652174 0.91666667 0.84615385
|
|
0.90384615 0.81818182 0.94117647 0.85185185]
|
|
|
|
mean value: 0.892418578046156
|
|
|
|
key: train_precision
|
|
value: [0.91799544 0.91910112 0.92325056 0.90868597 0.91571754 0.92201835
|
|
0.91216216 0.92922374 0.91498881 0.91743119]
|
|
|
|
mean value: 0.9180574902860337
|
|
|
|
key: test_recall
|
|
value: [0.87755102 0.87755102 0.87755102 0.89795918 0.88 0.88
|
|
0.94 0.9 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9048979591836734
|
|
|
|
key: train_recall
|
|
value: [0.90561798 0.91910112 0.91910112 0.91685393 0.90540541 0.90540541
|
|
0.91216216 0.91666667 0.91910112 0.8988764 ]
|
|
|
|
mean value: 0.9118291325032898
|
|
|
|
key: test_roc_auc
|
|
value: [0.89877551 0.89877551 0.86877551 0.92897959 0.89918367 0.85836735
|
|
0.91897959 0.84795918 0.95918367 0.8877551 ]
|
|
|
|
mean value: 0.896673469387755
|
|
|
|
key: train_roc_auc
|
|
value: [0.91226845 0.91901002 0.92126227 0.9122558 0.91112967 0.91450046
|
|
0.91226086 0.92350187 0.91685393 0.90898876]
|
|
|
|
mean value: 0.9152032088268043
|
|
|
|
key: test_jcc
|
|
value: [0.81132075 0.81132075 0.76785714 0.8627451 0.81481481 0.75862069
|
|
0.85454545 0.75 0.92307692 0.80701754]
|
|
|
|
mean value: 0.8161319176282334
|
|
|
|
key: train_jcc
|
|
value: [0.83783784 0.85031185 0.85386221 0.83950617 0.83575884 0.84100418
|
|
0.83850932 0.85684211 0.84679089 0.83160083]
|
|
|
|
mean value: 0.8432024237695408
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.51971292 2.06748104 1.48413873 3.4876287 2.68998909 1.79947209
|
|
3.10556483 3.42997026 3.49419332 2.99033737]
|
|
|
|
mean value: 2.8068488359451296
|
|
|
|
key: score_time
|
|
value: [0.01523614 0.01260996 0.01264262 0.01487184 0.01260662 0.01262021
|
|
0.01271057 0.01487207 0.01503444 0.012779 ]
|
|
|
|
mean value: 0.013598346710205078
|
|
|
|
key: test_mcc
|
|
value: [1. 0.96039208 0.84477989 0.94115314 1. 0.92213889
|
|
0.94108303 0.83174644 0.9797959 0.90267093]
|
|
|
|
mean value: 0.9323760302616704
|
|
|
|
key: train_mcc
|
|
value: [0.9977528 0.99551061 0.96400445 1. 0.99775281 0.97977497
|
|
1. 0.99775281 0.99775533 0.99775533]
|
|
|
|
mean value: 0.9928059111101858
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.97979798 0.91919192 0.96969697 1. 0.95959596
|
|
0.96969697 0.90909091 0.98979592 0.94897959]
|
|
|
|
mean value: 0.9645846217274788
|
|
|
|
key: train_accuracy
|
|
value: [0.99887514 0.99775028 0.98200225 1. 0.99887514 0.98987627
|
|
1. 0.99887514 0.9988764 0.9988764 ]
|
|
|
|
mean value: 0.9964007027211486
|
|
|
|
key: test_fscore
|
|
value: [1. 0.98 0.92307692 0.97029703 1. 0.96153846
|
|
0.97087379 0.91743119 0.98989899 0.95145631]
|
|
|
|
mean value: 0.9664572693965274
|
|
|
|
key: train_fscore
|
|
value: [0.99887767 0.99775785 0.98202247 1. 0.99887514 0.98989899
|
|
1. 0.99887514 0.99887767 0.99887767]
|
|
|
|
mean value: 0.996406258719058
|
|
|
|
key: test_precision
|
|
value: [1. 0.96078431 0.87272727 0.94230769 1. 0.92592593
|
|
0.94339623 0.84745763 0.98 0.90740741]
|
|
|
|
mean value: 0.9380006465627527
|
|
|
|
key: train_precision
|
|
value: [0.99775785 0.99552573 0.98202247 1. 0.99775281 0.98657718
|
|
1. 0.99775281 0.99775785 0.99775785]
|
|
|
|
mean value: 0.9952904540765942
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.97959184 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9979591836734694
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.98202247 1. 1. 0.99324324
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9975265715153355
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.98 0.91979592 0.97 1. 0.95918367
|
|
0.96938776 0.90816327 0.98979592 0.94897959]
|
|
|
|
mean value: 0.964530612244898
|
|
|
|
key: train_roc_auc
|
|
value: [0.99887387 0.99774775 0.98200223 1. 0.9988764 0.98988005
|
|
1. 0.9988764 0.9988764 0.9988764 ]
|
|
|
|
mean value: 0.9964009515133111
|
|
|
|
key: test_jcc
|
|
value: [1. 0.96078431 0.85714286 0.94230769 1. 0.92592593
|
|
0.94339623 0.84745763 0.98 0.90740741]
|
|
|
|
mean value: 0.9364422050043111
|
|
|
|
key: train_jcc
|
|
value: [0.99775785 0.99552573 0.96467991 1. 0.99775281 0.98
|
|
1. 0.99775281 0.99775785 0.99775785]
|
|
|
|
mean value: 0.9928984799347556
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.88
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05671525 0.04360437 0.04556775 0.04365325 0.04607248 0.04260731
|
|
0.04048634 0.04349804 0.04317045 0.0438695 ]
|
|
|
|
mean value: 0.044924473762512206
|
|
|
|
key: score_time
|
|
value: [0.00958228 0.00909448 0.00920367 0.00915694 0.00908446 0.00924468
|
|
0.00923967 0.00909996 0.00922346 0.0091207 ]
|
|
|
|
mean value: 0.009205031394958495
|
|
|
|
key: test_mcc
|
|
value: [0.92226137 0.96039208 0.96039208 0.96039208 0.96036035 0.885171
|
|
0.94108303 0.94108303 0.94053994 0.90267093]
|
|
|
|
mean value: 0.9374345888155746
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.97979798 0.97979798 0.97979798 0.97979798 0.93939394
|
|
0.96969697 0.96969697 0.96938776 0.94897959]
|
|
|
|
mean value: 0.9675943104514533
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96078431 0.98 0.98 0.98 0.98039216 0.94339623
|
|
0.97087379 0.97087379 0.97029703 0.95145631]
|
|
|
|
mean value: 0.9688073610201445
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9245283 0.96078431 0.96078431 0.96078431 0.96153846 0.89285714
|
|
0.94339623 0.94339623 0.94230769 0.90740741]
|
|
|
|
mean value: 0.9397784400004157
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.98 0.98 0.98 0.97959184 0.93877551
|
|
0.96938776 0.96938776 0.96938776 0.94897959]
|
|
|
|
mean value: 0.9675510204081633
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9245283 0.96078431 0.96078431 0.96078431 0.96153846 0.89285714
|
|
0.94339623 0.94339623 0.94230769 0.90740741]
|
|
|
|
mean value: 0.9397784400004157
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.15230417 0.1487422 0.14839315 0.1485703 0.14730835 0.14846444
|
|
0.14699531 0.14957166 0.14793539 0.14892721]
|
|
|
|
mean value: 0.14872121810913086
|
|
|
|
key: score_time
|
|
value: [0.01895952 0.01872087 0.01877856 0.01866269 0.01871753 0.0188334
|
|
0.01872659 0.01866031 0.01910186 0.01865768]
|
|
|
|
mean value: 0.01878190040588379
|
|
|
|
key: test_mcc
|
|
value: [1. 0.98 0.96039208 1. 1. 1.
|
|
0.97999192 0.97999192 0.9797959 1. ]
|
|
|
|
mean value: 0.9880171804111422
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.98989899 0.97979798 1. 1. 1.
|
|
0.98989899 0.98989899 0.98979592 1. ]
|
|
|
|
mean value: 0.9939290867862296
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.98989899 0.98 1. 1. 1.
|
|
0.99009901 0.99009901 0.98989899 1. ]
|
|
|
|
mean value: 0.993999599959996
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.98 0.96078431 1. 1. 1.
|
|
0.98039216 0.98039216 0.98 1. ]
|
|
|
|
mean value: 0.9881568627450981
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.99 0.98 1. 1. 1.
|
|
0.98979592 0.98979592 0.98979592 1. ]
|
|
|
|
mean value: 0.993938775510204
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.98 0.96078431 1. 1. 1.
|
|
0.98039216 0.98039216 0.98 1. ]
|
|
|
|
mean value: 0.9881568627450981
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01314187 0.01279283 0.01262784 0.0126195 0.01293945 0.01310587
|
|
0.01345801 0.01343894 0.0133245 0.01199412]
|
|
|
|
mean value: 0.012944293022155762
|
|
|
|
key: score_time
|
|
value: [0.0095377 0.00988412 0.00978994 0.00964975 0.00939465 0.00923729
|
|
0.00992608 0.00950336 0.00993872 0.00916696]
|
|
|
|
mean value: 0.009602856636047364
|
|
|
|
key: test_mcc
|
|
value: [0.86746758 0.92226137 0.88543774 0.96039208 0.885171 0.9035079
|
|
0.92213889 0.92213889 0.95998366 0.94053994]
|
|
|
|
mean value: 0.916903906054164
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92929293 0.95959596 0.93939394 0.97979798 0.93939394 0.94949495
|
|
0.95959596 0.95959596 0.97959184 0.96938776]
|
|
|
|
mean value: 0.9565141207998351
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.96078431 0.94230769 0.98 0.94339623 0.95238095
|
|
0.96153846 0.96153846 0.98 0.97029703]
|
|
|
|
mean value: 0.9585576470942456
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.9245283 0.89090909 0.96078431 0.89285714 0.90909091
|
|
0.92592593 0.92592593 0.96078431 0.94230769]
|
|
|
|
mean value: 0.920811361635446
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93 0.96 0.94 0.98 0.93877551 0.94897959
|
|
0.95918367 0.95918367 0.97959184 0.96938776]
|
|
|
|
mean value: 0.9565102040816327
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.9245283 0.89090909 0.96078431 0.89285714 0.90909091
|
|
0.92592593 0.92592593 0.96078431 0.94230769]
|
|
|
|
mean value: 0.920811361635446
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.32685018 2.30853224 2.29484224 2.2896266 2.3087759 2.30795121
|
|
2.31662416 2.28803205 2.32145357 2.32244158]
|
|
|
|
mean value: 2.3085129737854
|
|
|
|
key: score_time
|
|
value: [0.10511684 0.09732866 0.10092854 0.0974431 0.10452056 0.10459495
|
|
0.1027 0.10587049 0.10252953 0.09685755]
|
|
|
|
mean value: 0.10178902149200439
|
|
|
|
key: test_mcc
|
|
value: [0.96039208 0.98 0.96039208 1. 1. 0.97999192
|
|
0.96036035 0.97999192 0.9797959 1. ]
|
|
|
|
mean value: 0.9800924226675874
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97979798 0.98989899 0.97979798 1. 1. 0.98989899
|
|
0.97979798 0.98989899 0.98979592 1. ]
|
|
|
|
mean value: 0.9898886827458256
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98 0.98989899 0.98 1. 1. 0.99009901
|
|
0.98039216 0.99009901 0.98989899 1. ]
|
|
|
|
mean value: 0.9900388156462705
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96078431 0.98 0.96078431 1. 1. 0.98039216
|
|
0.96153846 0.98039216 0.98 1. ]
|
|
|
|
mean value: 0.9803891402714933
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.99 0.98 1. 1. 0.98979592
|
|
0.97959184 0.98979592 0.98979592 1. ]
|
|
|
|
mean value: 0.9898979591836734
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96078431 0.98 0.96078431 1. 1. 0.98039216
|
|
0.96153846 0.98039216 0.98 1. ]
|
|
|
|
mean value: 0.9803891402714933
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.55
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.07593513 1.07448363 1.07998729 1.07404733 1.10889888 1.11362147
|
|
1.06546926 1.15706944 1.11698508 1.16285443]
|
|
|
|
mean value: 1.1029351949691772
|
|
|
|
key: score_time
|
|
value: [0.25335073 0.24149394 0.24134088 0.1889143 0.28028631 0.27490306
|
|
0.28356004 0.26948714 0.26369286 0.1434238 ]
|
|
|
|
mean value: 0.24404530525207518
|
|
|
|
key: test_mcc
|
|
value: [0.96039208 0.93956725 0.94115314 0.98 1. 0.97999192
|
|
0.94108303 0.96036035 0.95998366 0.9797959 ]
|
|
|
|
mean value: 0.9642327314572875
|
|
|
|
key: train_mcc
|
|
value: [0.99104115 0.98427425 0.99327341 0.98881381 0.98881409 0.98659176
|
|
0.99104133 0.98881409 0.99105127 0.98882646]
|
|
|
|
mean value: 0.9892541629490832
|
|
|
|
key: test_accuracy
|
|
value: [0.97979798 0.96969697 0.96969697 0.98989899 1. 0.98989899
|
|
0.96969697 0.97979798 0.97959184 0.98979592]
|
|
|
|
mean value: 0.9817872603586889
|
|
|
|
key: train_accuracy
|
|
value: [0.99550056 0.99212598 0.99662542 0.9943757 0.9943757 0.99325084
|
|
0.99550056 0.9943757 0.99550562 0.99438202]
|
|
|
|
mean value: 0.9946018124138977
|
|
|
|
key: test_fscore
|
|
value: [0.98 0.96907216 0.97029703 0.98989899 1. 0.99009901
|
|
0.97087379 0.98039216 0.98 0.98989899]
|
|
|
|
mean value: 0.9820532127620906
|
|
|
|
key: train_fscore
|
|
value: [0.99552573 0.99216125 0.99664054 0.99441341 0.9944009 0.99328859
|
|
0.9955157 0.9944009 0.99552573 0.99441341]
|
|
|
|
mean value: 0.9946286138879104
|
|
|
|
key: test_precision
|
|
value: [0.96078431 0.97916667 0.94230769 0.98 1. 0.98039216
|
|
0.94339623 0.96153846 0.96078431 0.98 ]
|
|
|
|
mean value: 0.968836983124164
|
|
|
|
key: train_precision
|
|
value: [0.99109131 0.98883929 0.99330357 0.98888889 0.98886414 0.98666667
|
|
0.99107143 0.98886414 0.99109131 0.98888889]
|
|
|
|
mean value: 0.9897569643299042
|
|
|
|
key: test_recall
|
|
value: [1. 0.95918367 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9959183673469387
|
|
|
|
key: train_recall
|
|
value: [1. 0.99550562 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9995505617977528
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.96959184 0.97 0.99 1. 0.98979592
|
|
0.96938776 0.97959184 0.97959184 0.98979592]
|
|
|
|
mean value: 0.9817755102040816
|
|
|
|
key: train_roc_auc
|
|
value: [0.9954955 0.99212218 0.99662162 0.99436937 0.99438202 0.99325843
|
|
0.99550562 0.99438202 0.99550562 0.99438202]
|
|
|
|
mean value: 0.9946024395181698
|
|
|
|
key: test_jcc
|
|
value: [0.96078431 0.94 0.94230769 0.98 1. 0.98039216
|
|
0.94339623 0.96153846 0.96078431 0.98 ]
|
|
|
|
mean value: 0.9649203164574973
|
|
|
|
key: train_jcc
|
|
value: [0.99109131 0.98444444 0.99330357 0.98888889 0.98886414 0.98666667
|
|
0.99107143 0.98886414 0.99109131 0.98888889]
|
|
|
|
mean value: 0.9893174802029201
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02743602 0.01632857 0.01631403 0.01635146 0.01628733 0.0164156
|
|
0.01630664 0.01633143 0.01638961 0.01629639]
|
|
|
|
mean value: 0.017445707321166994
|
|
|
|
key: score_time
|
|
value: [0.0134027 0.01226759 0.01244688 0.01224279 0.01223731 0.01219273
|
|
0.01228523 0.01232171 0.0122571 0.01234198]
|
|
|
|
mean value: 0.012399601936340331
|
|
|
|
key: test_mcc
|
|
value: [0.5365758 0.31303661 0.5355102 0.597546 0.57651565 0.47486754
|
|
0.55578301 0.47758063 0.55147997 0.51407258]
|
|
|
|
mean value: 0.5132967995255227
|
|
|
|
key: train_mcc
|
|
value: [0.55070563 0.56153007 0.56203173 0.54608634 0.55026989 0.55005896
|
|
0.5275846 0.54555623 0.55304503 0.54383258]
|
|
|
|
mean value: 0.549070106228705
|
|
|
|
key: test_accuracy
|
|
value: [0.76767677 0.65656566 0.76767677 0.7979798 0.78787879 0.73737374
|
|
0.77777778 0.73737374 0.7755102 0.75510204]
|
|
|
|
mean value: 0.7560915275200989
|
|
|
|
key: train_accuracy
|
|
value: [0.77502812 0.78065242 0.78065242 0.7727784 0.77502812 0.77502812
|
|
0.76377953 0.7727784 0.77640449 0.77191011]
|
|
|
|
mean value: 0.7744040141049785
|
|
|
|
key: test_fscore
|
|
value: [0.77227723 0.64583333 0.76767677 0.78723404 0.7961165 0.74509804
|
|
0.78431373 0.75471698 0.77083333 0.76923077]
|
|
|
|
mean value: 0.7593330724542494
|
|
|
|
key: train_fscore
|
|
value: [0.78070175 0.78405316 0.7864184 0.77802198 0.77777778 0.77426637
|
|
0.76457399 0.77252252 0.77962348 0.77267637]
|
|
|
|
mean value: 0.7770635795528944
|
|
|
|
key: test_precision
|
|
value: [0.75 0.65957447 0.76 0.82222222 0.77358491 0.73076923
|
|
0.76923077 0.71428571 0.78723404 0.72727273]
|
|
|
|
mean value: 0.7494174080079339
|
|
|
|
key: train_precision
|
|
value: [0.76231263 0.77292576 0.76709402 0.76129032 0.76754386 0.7760181
|
|
0.76116071 0.77252252 0.76855895 0.77008929]
|
|
|
|
mean value: 0.7679516171384001
|
|
|
|
key: test_recall
|
|
value: [0.79591837 0.63265306 0.7755102 0.75510204 0.82 0.76
|
|
0.8 0.8 0.75510204 0.81632653]
|
|
|
|
mean value: 0.7710612244897959
|
|
|
|
key: train_recall
|
|
value: [0.8 0.79550562 0.80674157 0.79550562 0.78828829 0.77252252
|
|
0.76801802 0.77252252 0.79101124 0.7752809 ]
|
|
|
|
mean value: 0.7865396295171576
|
|
|
|
key: test_roc_auc
|
|
value: [0.76795918 0.65632653 0.7677551 0.79755102 0.78755102 0.73714286
|
|
0.77755102 0.73673469 0.7755102 0.75510204]
|
|
|
|
mean value: 0.7559183673469387
|
|
|
|
key: train_roc_auc
|
|
value: [0.775 0.78063569 0.78062304 0.77275281 0.77504302 0.77502531
|
|
0.76378429 0.77277812 0.77640449 0.77191011]
|
|
|
|
mean value: 0.7743956878226541
|
|
|
|
key: test_jcc
|
|
value: [0.62903226 0.47692308 0.62295082 0.64912281 0.66129032 0.59375
|
|
0.64516129 0.60606061 0.62711864 0.625 ]
|
|
|
|
mean value: 0.6136409824708896
|
|
|
|
key: train_jcc
|
|
value: [0.64028777 0.64480874 0.64801444 0.63669065 0.63636364 0.63167587
|
|
0.61887477 0.6293578 0.63883848 0.62956204]
|
|
|
|
mean value: 0.635447420260183
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.12911439 0.12384319 0.11070585 0.10750079 0.11122203 0.10650539
|
|
0.10826492 0.23588061 0.45501494 0.10379076]
|
|
|
|
mean value: 0.15918428897857667
|
|
|
|
key: score_time
|
|
value: [0.01141906 0.0133872 0.01122379 0.01129675 0.01144004 0.01128483
|
|
0.01124716 0.01270914 0.01174188 0.01141691]
|
|
|
|
mean value: 0.011716675758361817
|
|
|
|
key: test_mcc
|
|
value: [0.94115314 0.98 0.94115314 1. 1. 0.9035079
|
|
0.96036035 1. 0.94053994 0.95998366]
|
|
|
|
mean value: 0.9626698129491423
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.98989899 0.96969697 1. 1. 0.94949495
|
|
0.97979798 1. 0.96938776 0.97959184]
|
|
|
|
mean value: 0.9807565450422593
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97029703 0.98989899 0.97029703 1. 1. 0.95238095
|
|
0.98039216 1. 0.97029703 0.98 ]
|
|
|
|
mean value: 0.9813563188251598
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94230769 0.98 0.94230769 1. 1. 0.90909091
|
|
0.96153846 1. 0.94230769 0.96078431]
|
|
|
|
mean value: 0.9638336761277938
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97 0.99 0.97 1. 1. 0.94897959
|
|
0.97959184 1. 0.96938776 0.97959184]
|
|
|
|
mean value: 0.9807551020408163
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94230769 0.98 0.94230769 1. 1. 0.90909091
|
|
0.96153846 1. 0.94230769 0.96078431]
|
|
|
|
mean value: 0.9638336761277938
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05297399 0.06838965 0.12043762 0.05755877 0.07551694 0.05854273
|
|
0.1428442 0.12786603 0.09089589 0.0710535 ]
|
|
|
|
mean value: 0.0866079330444336
|
|
|
|
key: score_time
|
|
value: [0.01945043 0.0287354 0.0185883 0.01245427 0.0124898 0.01918125
|
|
0.02129316 0.02139568 0.01937938 0.01261044]
|
|
|
|
mean value: 0.018557810783386232
|
|
|
|
key: test_mcc
|
|
value: [0.80073891 0.85877551 0.80412203 0.8412464 0.80041656 0.773064
|
|
0.9035079 0.74701788 0.88420483 0.78354679]
|
|
|
|
mean value: 0.8196640815085476
|
|
|
|
key: train_mcc
|
|
value: [0.87979116 0.89404404 0.90067429 0.90545942 0.87814502 0.89568737
|
|
0.88904331 0.89332285 0.9079686 0.88188139]
|
|
|
|
mean value: 0.8926017466386073
|
|
|
|
key: test_accuracy
|
|
value: [0.8989899 0.92929293 0.8989899 0.91919192 0.8989899 0.87878788
|
|
0.94949495 0.85858586 0.93877551 0.8877551 ]
|
|
|
|
mean value: 0.905885384456813
|
|
|
|
key: train_accuracy
|
|
value: [0.93925759 0.94600675 0.94938133 0.95163105 0.93813273 0.94713161
|
|
0.94375703 0.94600675 0.95280899 0.94044944]
|
|
|
|
mean value: 0.9454563263861681
|
|
|
|
key: test_fscore
|
|
value: [0.90196078 0.92929293 0.90384615 0.92156863 0.90384615 0.89090909
|
|
0.95238095 0.87719298 0.94230769 0.8952381 ]
|
|
|
|
mean value: 0.9118543462041914
|
|
|
|
key: train_fscore
|
|
value: [0.94091904 0.94782609 0.95103373 0.95331162 0.94002181 0.94852136
|
|
0.9452954 0.94736842 0.95444685 0.94182217]
|
|
|
|
mean value: 0.947056649665704
|
|
|
|
key: test_precision
|
|
value: [0.86792453 0.92 0.85454545 0.88679245 0.87037037 0.81666667
|
|
0.90909091 0.78125 0.89090909 0.83928571]
|
|
|
|
mean value: 0.8636835187000281
|
|
|
|
key: train_precision
|
|
value: [0.91684435 0.91789474 0.92194093 0.92226891 0.91120507 0.92324094
|
|
0.91914894 0.92307692 0.92243187 0.92060086]
|
|
|
|
mean value: 0.9198653517961752
|
|
|
|
key: test_recall
|
|
value: [0.93877551 0.93877551 0.95918367 0.95918367 0.94 0.98
|
|
1. 1. 1. 0.95918367]
|
|
|
|
mean value: 0.9675102040816327
|
|
|
|
key: train_recall
|
|
value: [0.96629213 0.97977528 0.98202247 0.98651685 0.97072072 0.97522523
|
|
0.97297297 0.97297297 0.98876404 0.96404494]
|
|
|
|
mean value: 0.9759307622228971
|
|
|
|
key: test_roc_auc
|
|
value: [0.89938776 0.92938776 0.89959184 0.91959184 0.89857143 0.8777551
|
|
0.94897959 0.85714286 0.93877551 0.8877551 ]
|
|
|
|
mean value: 0.9056938775510205
|
|
|
|
key: train_roc_auc
|
|
value: [0.93922715 0.94596872 0.94934457 0.95159176 0.93816935 0.94716317
|
|
0.94378986 0.94603705 0.95280899 0.94044944]
|
|
|
|
mean value: 0.9454550055673652
|
|
|
|
key: test_jcc
|
|
value: [0.82142857 0.86792453 0.8245614 0.85454545 0.8245614 0.80327869
|
|
0.90909091 0.78125 0.89090909 0.81034483]
|
|
|
|
mean value: 0.8387894877404254
|
|
|
|
key: train_jcc
|
|
value: [0.88842975 0.90082645 0.906639 0.91078838 0.88683128 0.90208333
|
|
0.89626556 0.9 0.91286307 0.89004149]
|
|
|
|
mean value: 0.8994768317774049
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02602434 0.01596308 0.01641655 0.01636362 0.01584506 0.01581049
|
|
0.01583481 0.01604867 0.0157218 0.0158546 ]
|
|
|
|
mean value: 0.016988301277160646
|
|
|
|
key: score_time
|
|
value: [0.01319695 0.01251769 0.01246452 0.01245952 0.01223183 0.01224542
|
|
0.01221228 0.01219058 0.01225233 0.01216555]
|
|
|
|
mean value: 0.012393665313720704
|
|
|
|
key: test_mcc
|
|
value: [0.59985774 0.49692935 0.597546 0.61702314 0.55614541 0.57591837
|
|
0.59591837 0.64061678 0.53072278 0.51062961]
|
|
|
|
mean value: 0.5721307535459735
|
|
|
|
key: train_mcc
|
|
value: [0.60857704 0.57744364 0.60629925 0.62436726 0.61983145 0.58830139
|
|
0.58607914 0.58618911 0.55957317 0.61576921]
|
|
|
|
mean value: 0.5972430660263615
|
|
|
|
key: test_accuracy
|
|
value: [0.7979798 0.74747475 0.7979798 0.80808081 0.77777778 0.78787879
|
|
0.7979798 0.81818182 0.76530612 0.75510204]
|
|
|
|
mean value: 0.785374149659864
|
|
|
|
key: train_accuracy
|
|
value: [0.80427447 0.78852643 0.80314961 0.81214848 0.80989876 0.79415073
|
|
0.79302587 0.79302587 0.77977528 0.80786517]
|
|
|
|
mean value: 0.7985840674410081
|
|
|
|
key: test_fscore
|
|
value: [0.7826087 0.7311828 0.78723404 0.8 0.7755102 0.78787879
|
|
0.8 0.83018868 0.76767677 0.76 ]
|
|
|
|
mean value: 0.7822279972786761
|
|
|
|
key: train_fscore
|
|
value: [0.80361174 0.78489703 0.80359147 0.81382386 0.80860702 0.79368658
|
|
0.79372197 0.7904328 0.77878104 0.80677966]
|
|
|
|
mean value: 0.797793317069756
|
|
|
|
key: test_precision
|
|
value: [0.8372093 0.77272727 0.82222222 0.82608696 0.79166667 0.79591837
|
|
0.8 0.78571429 0.76 0.74509804]
|
|
|
|
mean value: 0.7936643112740392
|
|
|
|
key: train_precision
|
|
value: [0.80725624 0.7995338 0.80269058 0.80752212 0.81321185 0.79458239
|
|
0.79017857 0.79953917 0.78231293 0.81136364]
|
|
|
|
mean value: 0.8008191283563129
|
|
|
|
key: test_recall
|
|
value: [0.73469388 0.69387755 0.75510204 0.7755102 0.76 0.78
|
|
0.8 0.88 0.7755102 0.7755102 ]
|
|
|
|
mean value: 0.7730204081632653
|
|
|
|
key: train_recall
|
|
value: [0.8 0.77078652 0.80449438 0.82022472 0.80405405 0.79279279
|
|
0.7972973 0.78153153 0.7752809 0.80224719]
|
|
|
|
mean value: 0.7948709383540844
|
|
|
|
key: test_roc_auc
|
|
value: [0.79734694 0.74693878 0.79755102 0.8077551 0.77795918 0.78795918
|
|
0.79795918 0.81755102 0.76530612 0.75510204]
|
|
|
|
mean value: 0.7851428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.80427928 0.78854641 0.80314809 0.81213939 0.8098922 0.79414921
|
|
0.79303067 0.79301296 0.77977528 0.80786517]
|
|
|
|
mean value: 0.7985838647636401
|
|
|
|
key: test_jcc
|
|
value: [0.64285714 0.57627119 0.64912281 0.66666667 0.63333333 0.65
|
|
0.66666667 0.70967742 0.62295082 0.61290323]
|
|
|
|
mean value: 0.6430449267815452
|
|
|
|
key: train_jcc
|
|
value: [0.67169811 0.64595104 0.67166979 0.68609023 0.67870722 0.65794393
|
|
0.65799257 0.65348399 0.63770795 0.67613636]
|
|
|
|
mean value: 0.6637381187145421
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02911568 0.02824807 0.03060579 0.02883339 0.03068709 0.0293889
|
|
0.02756166 0.03856134 0.0417738 0.0363481 ]
|
|
|
|
mean value: 0.03211238384246826
|
|
|
|
key: score_time
|
|
value: [0.01238871 0.0123229 0.01254773 0.01233053 0.01258206 0.01234341
|
|
0.01238132 0.01236963 0.01256609 0.01240015]
|
|
|
|
mean value: 0.012423253059387207
|
|
|
|
key: test_mcc
|
|
value: [0.71419992 0.73207752 0.68524605 0.64902293 0.83836735 0.61967734
|
|
0.75807168 0.66433509 0.8660254 0.85875386]
|
|
|
|
mean value: 0.7385777135283248
|
|
|
|
key: train_mcc
|
|
value: [0.75136648 0.79725751 0.80649573 0.75153018 0.84500822 0.79438266
|
|
0.81362727 0.64852975 0.87160975 0.87192993]
|
|
|
|
mean value: 0.795173747480457
|
|
|
|
key: test_accuracy
|
|
value: [0.84848485 0.84848485 0.83838384 0.81818182 0.91919192 0.80808081
|
|
0.87878788 0.81818182 0.92857143 0.92857143]
|
|
|
|
mean value: 0.8634920634920635
|
|
|
|
key: train_accuracy
|
|
value: [0.86839145 0.88863892 0.89876265 0.87064117 0.92125984 0.89426322
|
|
0.90663667 0.80202475 0.93258427 0.93595506]
|
|
|
|
mean value: 0.8919157998508613
|
|
|
|
key: test_fscore
|
|
value: [0.82758621 0.86725664 0.82222222 0.79545455 0.92 0.8
|
|
0.88235294 0.79069767 0.93333333 0.93069307]
|
|
|
|
mean value: 0.8569596629976801
|
|
|
|
key: train_fscore
|
|
value: [0.85429639 0.89989889 0.8907767 0.85924113 0.92407809 0.88729017
|
|
0.90788013 0.75757576 0.93644068 0.93573844]
|
|
|
|
mean value: 0.885321637330162
|
|
|
|
key: test_precision
|
|
value: [0.94736842 0.765625 0.90243902 0.8974359 0.92 0.84444444
|
|
0.86538462 0.94444444 0.875 0.90384615]
|
|
|
|
mean value: 0.8865988000998432
|
|
|
|
key: train_precision
|
|
value: [0.95810056 0.81801471 0.96833773 0.94354839 0.89121339 0.94871795
|
|
0.89496718 0.9751773 0.88577154 0.93891403]
|
|
|
|
mean value: 0.9222762772791266
|
|
|
|
key: test_recall
|
|
value: [0.73469388 1. 0.75510204 0.71428571 0.92 0.76
|
|
0.9 0.68 1. 0.95918367]
|
|
|
|
mean value: 0.8423265306122449
|
|
|
|
key: train_recall
|
|
value: [0.77078652 1. 0.8247191 0.78876404 0.95945946 0.83333333
|
|
0.92117117 0.61936937 0.99325843 0.93258427]
|
|
|
|
mean value: 0.8643445692883895
|
|
|
|
key: test_roc_auc
|
|
value: [0.84734694 0.85 0.83755102 0.81714286 0.91918367 0.80857143
|
|
0.87857143 0.81959184 0.92857143 0.92857143]
|
|
|
|
mean value: 0.8635102040816327
|
|
|
|
key: train_roc_auc
|
|
value: [0.86850137 0.88851351 0.89884604 0.87073337 0.92130276 0.89419476
|
|
0.906653 0.80181952 0.93258427 0.93595506]
|
|
|
|
mean value: 0.8919103654216014
|
|
|
|
key: test_jcc
|
|
value: [0.70588235 0.765625 0.69811321 0.66037736 0.85185185 0.66666667
|
|
0.78947368 0.65384615 0.875 0.87037037]
|
|
|
|
mean value: 0.7537206645924481
|
|
|
|
key: train_jcc
|
|
value: [0.74565217 0.81801471 0.80306346 0.75321888 0.85887097 0.79741379
|
|
0.83130081 0.6097561 0.88047809 0.87923729]
|
|
|
|
mean value: 0.7977006268445469
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03706861 0.0358808 0.0361433 0.0313592 0.03414869 0.02584052
|
|
0.0310216 0.03415847 0.0306828 0.02926207]
|
|
|
|
mean value: 0.03255660533905029
|
|
|
|
key: score_time
|
|
value: [0.01240444 0.01232743 0.01231074 0.01245427 0.01246023 0.01232338
|
|
0.01232743 0.01231575 0.01232076 0.01230383]
|
|
|
|
mean value: 0.012354826927185059
|
|
|
|
key: test_mcc
|
|
value: [0.66249814 0.62857143 0.7 0.86023767 0.90057555 0.6682599
|
|
0.74139967 0.66607486 0.81302949 0.7240002 ]
|
|
|
|
mean value: 0.736464691070464
|
|
|
|
key: train_mcc
|
|
value: [0.72556276 0.78345376 0.82973135 0.89075253 0.86394419 0.8207096
|
|
0.8115728 0.74908368 0.85154948 0.72212216]
|
|
|
|
mean value: 0.8048482312589154
|
|
|
|
key: test_accuracy
|
|
value: [0.81818182 0.7979798 0.82828283 0.92929293 0.94949495 0.82828283
|
|
0.86868687 0.80808081 0.89795918 0.85714286]
|
|
|
|
mean value: 0.8583384869099155
|
|
|
|
key: train_accuracy
|
|
value: [0.8503937 0.8863892 0.90776153 0.94488189 0.928009 0.90776153
|
|
0.90438695 0.85939258 0.92134831 0.8494382 ]
|
|
|
|
mean value: 0.8959762894806689
|
|
|
|
key: test_fscore
|
|
value: [0.78571429 0.75609756 0.85217391 0.93069307 0.95145631 0.81318681
|
|
0.86315789 0.84033613 0.90740741 0.84444444]
|
|
|
|
mean value: 0.8544667833949204
|
|
|
|
key: train_fscore
|
|
value: [0.8283871 0.87637699 0.91563786 0.94621295 0.93248945 0.90214797
|
|
0.90011751 0.87660415 0.92662474 0.8277635 ]
|
|
|
|
mean value: 0.8932362210480187
|
|
|
|
key: test_precision
|
|
value: [0.94285714 0.93939394 0.74242424 0.90384615 0.9245283 0.90243902
|
|
0.91111111 0.72463768 0.83050847 0.92682927]
|
|
|
|
mean value: 0.8748575339938001
|
|
|
|
key: train_precision
|
|
value: [0.97272727 0.96236559 0.84440228 0.9248927 0.87698413 0.95939086
|
|
0.94103194 0.78031634 0.86836935 0.96696697]
|
|
|
|
mean value: 0.9097447439088742
|
|
|
|
key: test_recall
|
|
value: [0.67346939 0.63265306 1. 0.95918367 0.98 0.74
|
|
0.82 1. 1. 0.7755102 ]
|
|
|
|
mean value: 0.8580816326530613
|
|
|
|
key: train_recall
|
|
value: [0.72134831 0.80449438 1. 0.96853933 0.9954955 0.85135135
|
|
0.86261261 1. 0.99325843 0.72359551]
|
|
|
|
mean value: 0.8920695414515639
|
|
|
|
key: test_roc_auc
|
|
value: [0.81673469 0.79632653 0.83 0.92959184 0.94918367 0.82918367
|
|
0.86918367 0.80612245 0.89795918 0.85714286]
|
|
|
|
mean value: 0.8581428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.85053902 0.88648143 0.90765766 0.94485525 0.92808483 0.90769815
|
|
0.90434001 0.85955056 0.92134831 0.8494382 ]
|
|
|
|
mean value: 0.8959993420386678
|
|
|
|
key: test_jcc
|
|
value: [0.64705882 0.60784314 0.74242424 0.87037037 0.90740741 0.68518519
|
|
0.75925926 0.72463768 0.83050847 0.73076923]
|
|
|
|
mean value: 0.7505463811935701
|
|
|
|
key: train_jcc
|
|
value: [0.70704846 0.77995643 0.84440228 0.89791667 0.87351779 0.82173913
|
|
0.81837607 0.78031634 0.86328125 0.70614035]
|
|
|
|
mean value: 0.8092694759584826
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.26323581 0.24968505 0.25807214 0.24491382 0.24431825 0.24463201
|
|
0.24448776 0.24209285 0.27050495 0.24602413]
|
|
|
|
mean value: 0.25079667568206787
|
|
|
|
key: score_time
|
|
value: [0.01670289 0.01700497 0.01596355 0.01601624 0.01584506 0.01608944
|
|
0.01593876 0.01598811 0.01581001 0.01592469]
|
|
|
|
mean value: 0.01612837314605713
|
|
|
|
key: test_mcc
|
|
value: [0.92226137 0.93959184 0.90369611 0.93959184 0.95959184 0.86710997
|
|
0.9035079 0.84930737 0.94053994 0.90267093]
|
|
|
|
mean value: 0.9127869117325943
|
|
|
|
key: train_mcc
|
|
value: [0.97116812 0.97766069 0.97774969 0.97324899 0.95548117 0.98437429
|
|
0.9755525 0.96898572 0.9821812 0.97338769]
|
|
|
|
mean value: 0.9739790054719715
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.96969697 0.94949495 0.96969697 0.97979798 0.92929293
|
|
0.94949495 0.91919192 0.96938776 0.94897959]
|
|
|
|
mean value: 0.9544629973201402
|
|
|
|
key: train_accuracy
|
|
value: [0.98537683 0.98875141 0.98875141 0.98650169 0.97750281 0.99212598
|
|
0.98762655 0.98425197 0.99101124 0.98651685]
|
|
|
|
mean value: 0.9868416728807775
|
|
|
|
key: test_fscore
|
|
value: [0.96078431 0.96969697 0.95145631 0.96969697 0.98 0.93457944
|
|
0.95238095 0.92592593 0.97029703 0.95145631]
|
|
|
|
mean value: 0.9566274221740838
|
|
|
|
key: train_fscore
|
|
value: [0.98560354 0.98886414 0.98888889 0.98666667 0.97782705 0.99217877
|
|
0.98776418 0.98447894 0.99109131 0.98669623]
|
|
|
|
mean value: 0.9870059726538327
|
|
|
|
key: test_precision
|
|
value: [0.9245283 0.96 0.90740741 0.96 0.98 0.87719298
|
|
0.90909091 0.86206897 0.94230769 0.90740741]
|
|
|
|
mean value: 0.9230003666073591
|
|
|
|
key: train_precision
|
|
value: [0.97161572 0.98013245 0.97802198 0.97582418 0.9628821 0.98447894
|
|
0.97582418 0.96943231 0.98233996 0.97374179]
|
|
|
|
mean value: 0.9754293596864881
|
|
|
|
key: test_recall
|
|
value: [1. 0.97959184 1. 0.97959184 0.98 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9939183673469387
|
|
|
|
key: train_recall
|
|
value: [1. 0.99775281 1. 0.99775281 0.99324324 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9988748861220771
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.96979592 0.95 0.96979592 0.97979592 0.92857143
|
|
0.94897959 0.91836735 0.96938776 0.94897959]
|
|
|
|
mean value: 0.9543673469387756
|
|
|
|
key: train_roc_auc
|
|
value: [0.98536036 0.98874127 0.98873874 0.98648902 0.9775205 0.99213483
|
|
0.98764045 0.98426966 0.99101124 0.98651685]
|
|
|
|
mean value: 0.9868422917299322
|
|
|
|
key: test_jcc
|
|
value: [0.9245283 0.94117647 0.90740741 0.94117647 0.96078431 0.87719298
|
|
0.90909091 0.86206897 0.94230769 0.90740741]
|
|
|
|
mean value: 0.9173140920975551
|
|
|
|
key: train_jcc
|
|
value: [0.97161572 0.97797357 0.97802198 0.97368421 0.95661605 0.98447894
|
|
0.97582418 0.96943231 0.98233996 0.97374179]
|
|
|
|
mean value: 0.9743728705508703
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20577955 0.22343469 0.22129583 0.22863293 0.20965981 0.21812749
|
|
0.2192595 0.22341514 0.22959924 0.22265434]
|
|
|
|
mean value: 0.22018585205078126
|
|
|
|
key: score_time
|
|
value: [0.0391283 0.03739238 0.03475857 0.03006387 0.03795433 0.04159594
|
|
0.03979635 0.04038548 0.04014564 0.03675556]
|
|
|
|
mean value: 0.03779764175415039
|
|
|
|
key: test_mcc
|
|
value: [0.92226137 0.98 0.92226137 0.96039208 0.96036035 0.92213889
|
|
0.94108303 0.96036035 0.94053994 0.92144268]
|
|
|
|
mean value: 0.9430840054418462
|
|
|
|
key: train_mcc
|
|
value: [0.9977528 1. 0.9977528 0.9977528 0.99775281 1.
|
|
0.99775281 1. 1. 0.99775533]
|
|
|
|
mean value: 0.9986519339119277
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.98989899 0.95959596 0.97979798 0.97979798 0.95959596
|
|
0.96969697 0.97979798 0.96938776 0.95918367]
|
|
|
|
mean value: 0.9706349206349206
|
|
|
|
key: train_accuracy
|
|
value: [0.99887514 1. 0.99887514 0.99887514 0.99887514 1.
|
|
0.99887514 1. 1. 0.9988764 ]
|
|
|
|
mean value: 0.9993252107531503
|
|
|
|
key: test_fscore
|
|
value: [0.96078431 0.98989899 0.96078431 0.98 0.98039216 0.96153846
|
|
0.97087379 0.98039216 0.97029703 0.96078431]
|
|
|
|
mean value: 0.971574552245015
|
|
|
|
key: train_fscore
|
|
value: [0.99887767 1. 0.99887767 0.99887767 0.99887514 1.
|
|
0.99887514 1. 1. 0.99887767]
|
|
|
|
mean value: 0.9993260943392177
|
|
|
|
key: test_precision
|
|
value: [0.9245283 0.98 0.9245283 0.96078431 0.96153846 0.92592593
|
|
0.94339623 0.96153846 0.94230769 0.9245283 ]
|
|
|
|
mean value: 0.9449075987111504
|
|
|
|
key: train_precision
|
|
value: [0.99775785 1. 0.99775785 0.99775785 0.99775281 1.
|
|
0.99775281 1. 1. 0.99775785]
|
|
|
|
mean value: 0.9986537008112057
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.99 0.96 0.98 0.97959184 0.95918367
|
|
0.96938776 0.97959184 0.96938776 0.95918367]
|
|
|
|
mean value: 0.9706326530612245
|
|
|
|
key: train_roc_auc
|
|
value: [0.99887387 1. 0.99887387 0.99887387 0.9988764 1.
|
|
0.9988764 1. 1. 0.9988764 ]
|
|
|
|
mean value: 0.9993250835104768
|
|
|
|
key: test_jcc
|
|
value: [0.9245283 0.98 0.9245283 0.96078431 0.96153846 0.92592593
|
|
0.94339623 0.96153846 0.94230769 0.9245283 ]
|
|
|
|
mean value: 0.9449075987111504
|
|
|
|
key: train_jcc
|
|
value: [0.99775785 1. 0.99775785 0.99775785 0.99775281 1.
|
|
0.99775281 1. 1. 0.99775785]
|
|
|
|
mean value: 0.9986537008112057
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.39124846 0.37915683 0.41166663 0.44417334 0.38366175 0.38395071
|
|
0.35391426 0.46815324 0.36007619 0.38461566]
|
|
|
|
mean value: 0.39606170654296874
|
|
|
|
key: score_time
|
|
value: [0.0231657 0.02200317 0.03856158 0.02258253 0.02176476 0.03803396
|
|
0.02177787 0.04820323 0.0225687 0.02284408]
|
|
|
|
mean value: 0.028150558471679688
|
|
|
|
key: test_mcc
|
|
value: [0.89918367 0.96039208 0.84976777 0.92226137 0.885171 0.94108303
|
|
0.84930737 0.9035079 0.90267093 0.90267093]
|
|
|
|
mean value: 0.9016016065573643
|
|
|
|
key: train_mcc
|
|
value: [0.98877383 0.98881381 0.99327341 0.99104115 0.98659176 0.98437429
|
|
0.98659176 0.99104133 0.98660654 0.98882646]
|
|
|
|
mean value: 0.9885934339656905
|
|
|
|
key: test_accuracy
|
|
value: [0.94949495 0.97979798 0.91919192 0.95959596 0.93939394 0.96969697
|
|
0.91919192 0.94949495 0.94897959 0.94897959]
|
|
|
|
mean value: 0.9483817769532056
|
|
|
|
key: train_accuracy
|
|
value: [0.9943757 0.9943757 0.99662542 0.99550056 0.99325084 0.99212598
|
|
0.99325084 0.99550056 0.99325843 0.99438202]
|
|
|
|
mean value: 0.9942646073735165
|
|
|
|
key: test_fscore
|
|
value: [0.94949495 0.98 0.9245283 0.96078431 0.94339623 0.97087379
|
|
0.92592593 0.95238095 0.95145631 0.95145631]
|
|
|
|
mean value: 0.9510297077596195
|
|
|
|
key: train_fscore
|
|
value: [0.9944009 0.99441341 0.99664054 0.99552573 0.99328859 0.99217877
|
|
0.99328859 0.9955157 0.99330357 0.99441341]
|
|
|
|
mean value: 0.994296919473608
|
|
|
|
key: test_precision
|
|
value: [0.94 0.96078431 0.85964912 0.9245283 0.89285714 0.94339623
|
|
0.86206897 0.90909091 0.90740741 0.90740741]
|
|
|
|
mean value: 0.9107189797114503
|
|
|
|
key: train_precision
|
|
value: [0.99107143 0.98888889 0.99330357 0.99109131 0.98666667 0.98447894
|
|
0.98666667 0.99107143 0.98669623 0.98888889]
|
|
|
|
mean value: 0.9888824020010838
|
|
|
|
key: test_recall
|
|
value: [0.95918367 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9959183673469387
|
|
|
|
key: train_recall
|
|
value: [0.99775281 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997752808988765
|
|
|
|
key: test_roc_auc
|
|
value: [0.94959184 0.98 0.92 0.96 0.93877551 0.96938776
|
|
0.91836735 0.94897959 0.94897959 0.94897959]
|
|
|
|
mean value: 0.9483061224489796
|
|
|
|
key: train_roc_auc
|
|
value: [0.9943719 0.99436937 0.99662162 0.9954955 0.99325843 0.99213483
|
|
0.99325843 0.99550562 0.99325843 0.99438202]
|
|
|
|
mean value: 0.9942656139285353
|
|
|
|
key: test_jcc
|
|
value: [0.90384615 0.96078431 0.85964912 0.9245283 0.89285714 0.94339623
|
|
0.86206897 0.90909091 0.90740741 0.90740741]
|
|
|
|
mean value: 0.9071035950960656
|
|
|
|
key: train_jcc
|
|
value: [0.98886414 0.98888889 0.99330357 0.99109131 0.98666667 0.98447894
|
|
0.98666667 0.99107143 0.98669623 0.98888889]
|
|
|
|
mean value: 0.9886616733978385
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.07809114 1.06776786 1.046664 1.04490209 1.04288983 1.0434916
|
|
1.04305387 1.03194952 1.04640913 1.0474987 ]
|
|
|
|
mean value: 1.0492717742919921
|
|
|
|
key: score_time
|
|
value: [0.0101428 0.00945425 0.00942683 0.00941992 0.00957966 0.00944257
|
|
0.00946975 0.00951409 0.00959921 0.00940251]
|
|
|
|
mean value: 0.009545159339904786
|
|
|
|
key: test_mcc
|
|
value: [0.94115314 0.96039208 0.90369611 0.98 0.97999192 0.84930737
|
|
0.94108303 0.97999192 0.94053994 0.92144268]
|
|
|
|
mean value: 0.9397598182591206
|
|
|
|
key: train_mcc
|
|
value: [0.9977528 1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. ]
|
|
|
|
mean value: 0.999775279762791
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.97979798 0.94949495 0.98989899 0.98989899 0.91919192
|
|
0.96969697 0.98989899 0.96938776 0.95918367]
|
|
|
|
mean value: 0.9686147186147186
|
|
|
|
key: train_accuracy
|
|
value: [0.99887514 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998875140607424
|
|
|
|
key: test_fscore
|
|
value: [0.97029703 0.98 0.95145631 0.98989899 0.99009901 0.92592593
|
|
0.97087379 0.99009901 0.97029703 0.96078431]
|
|
|
|
mean value: 0.9699731405845705
|
|
|
|
key: train_fscore
|
|
value: [0.99887767 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998877665544332
|
|
|
|
key: test_precision
|
|
value: [0.94230769 0.96078431 0.90740741 0.98 0.98039216 0.86206897
|
|
0.94339623 0.98039216 0.94230769 0.9245283 ]
|
|
|
|
mean value: 0.9423584913292901
|
|
|
|
key: train_precision
|
|
value: [0.99775785 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997757847533633
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97 0.98 0.95 0.99 0.98979592 0.91836735
|
|
0.96938776 0.98979592 0.96938776 0.95918367]
|
|
|
|
mean value: 0.968591836734694
|
|
|
|
key: train_roc_auc
|
|
value: [0.99887387 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998873873873874
|
|
|
|
key: test_jcc
|
|
value: [0.94230769 0.96078431 0.90740741 0.98 0.98039216 0.86206897
|
|
0.94339623 0.98039216 0.94230769 0.9245283 ]
|
|
|
|
mean value: 0.9423584913292901
|
|
|
|
key: train_jcc
|
|
value: [0.99775785 1. 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9997757847533633
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0465219 0.05157685 0.04720354 0.04639912 0.04473281 0.04773688
|
|
0.0439477 0.05053854 0.04681659 0.04573298]
|
|
|
|
mean value: 0.04712069034576416
|
|
|
|
key: score_time
|
|
value: [0.01468301 0.01334667 0.02294636 0.01538634 0.01501942 0.01559162
|
|
0.01431656 0.01549363 0.02266312 0.01899052]
|
|
|
|
mean value: 0.016843724250793456
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 0.98 1. 1. 1.
|
|
1. 0.97999192 1. 1. ]
|
|
|
|
mean value: 0.995999191510005
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 0.98989899 1. 1. 1.
|
|
1. 0.98989899 1. 1. ]
|
|
|
|
mean value: 0.997979797979798
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 0.98989899 1. 1. 1.
|
|
1. 0.99009901 1. 1. ]
|
|
|
|
mean value: 0.997999799979998
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.98 1. 1. 1.
|
|
1. 0.98039216 1. 1. ]
|
|
|
|
mean value: 0.9960392156862745
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 0.99 1. 1. 1.
|
|
1. 0.98979592 1. 1. ]
|
|
|
|
mean value: 0.9979795918367347
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 0.98 1. 1. 1.
|
|
1. 0.98039216 1. 1. ]
|
|
|
|
mean value: 0.9960392156862745
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0273056 0.04202557 0.04229426 0.04192209 0.04645848 0.04385948
|
|
0.04084206 0.03450871 0.04030275 0.04218531]
|
|
|
|
mean value: 0.04017043113708496
|
|
|
|
key: score_time
|
|
value: [0.01933646 0.02441096 0.01946759 0.01935792 0.01965642 0.01993775
|
|
0.01970983 0.01972532 0.0214057 0.01936388]
|
|
|
|
mean value: 0.020237183570861815
|
|
|
|
key: test_mcc
|
|
value: [0.83898714 0.85877551 0.74519047 0.87877551 0.85871792 0.78535389
|
|
0.85871792 0.84930737 0.90267093 0.80195322]
|
|
|
|
mean value: 0.8378449875400339
|
|
|
|
key: train_mcc
|
|
value: [0.88390383 0.90757425 0.89545662 0.8633928 0.88477967 0.89041977
|
|
0.86573291 0.89194373 0.89416909 0.89537958]
|
|
|
|
mean value: 0.8872752259783697
|
|
|
|
key: test_accuracy
|
|
value: [0.91919192 0.92929293 0.86868687 0.93939394 0.92929293 0.88888889
|
|
0.92929293 0.91919192 0.94897959 0.89795918]
|
|
|
|
mean value: 0.9170171098742528
|
|
|
|
key: train_accuracy
|
|
value: [0.94150731 0.95275591 0.94713161 0.93138358 0.94150731 0.94488189
|
|
0.93250844 0.94488189 0.94606742 0.94719101]
|
|
|
|
mean value: 0.9429816357225009
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.92929293 0.87619048 0.93877551 0.93069307 0.89719626
|
|
0.93069307 0.92592593 0.95145631 0.90384615]
|
|
|
|
mean value: 0.920073637310195
|
|
|
|
key: train_fscore
|
|
value: [0.94285714 0.95434783 0.94852136 0.93274531 0.94323144 0.94585635
|
|
0.93377483 0.94668118 0.94782609 0.94840834]
|
|
|
|
mean value: 0.9444249875030741
|
|
|
|
key: test_precision
|
|
value: [0.93617021 0.92 0.82142857 0.93877551 0.92156863 0.84210526
|
|
0.92156863 0.86206897 0.90740741 0.85454545]
|
|
|
|
mean value: 0.8925638639928569
|
|
|
|
key: train_precision
|
|
value: [0.92258065 0.92421053 0.92521368 0.91558442 0.91525424 0.92841649
|
|
0.91558442 0.91578947 0.91789474 0.92703863]
|
|
|
|
mean value: 0.9207567238183697
|
|
|
|
key: test_recall
|
|
value: [0.89795918 0.93877551 0.93877551 0.93877551 0.94 0.96
|
|
0.94 1. 1. 0.95918367]
|
|
|
|
mean value: 0.9513469387755102
|
|
|
|
key: train_recall
|
|
value: [0.96404494 0.98651685 0.97303371 0.9505618 0.97297297 0.96396396
|
|
0.9527027 0.97972973 0.97977528 0.97078652]
|
|
|
|
mean value: 0.9694088470492965
|
|
|
|
key: test_roc_auc
|
|
value: [0.91897959 0.92938776 0.86938776 0.93938776 0.92918367 0.88816327
|
|
0.92918367 0.91836735 0.94897959 0.89795918]
|
|
|
|
mean value: 0.9168979591836736
|
|
|
|
key: train_roc_auc
|
|
value: [0.94148193 0.95271789 0.94710244 0.93136198 0.94154267 0.94490333
|
|
0.93253113 0.94492104 0.94606742 0.94719101]
|
|
|
|
mean value: 0.9429820832068023
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.86792453 0.77966102 0.88461538 0.87037037 0.81355932
|
|
0.87037037 0.86206897 0.90740741 0.8245614 ]
|
|
|
|
mean value: 0.852669261522833
|
|
|
|
key: train_jcc
|
|
value: [0.89189189 0.91268191 0.90208333 0.87396694 0.89256198 0.89727463
|
|
0.8757764 0.89876033 0.90082645 0.90187891]
|
|
|
|
mean value: 0.8947702785430705
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.33683848 0.37552404 0.41273165 0.38046598 0.49225712 0.34159184
|
|
0.22386527 0.36393523 0.20436549 0.3592937 ]
|
|
|
|
mean value: 0.3490868806838989
|
|
|
|
key: score_time
|
|
value: [0.01950693 0.01995325 0.01967692 0.02090549 0.01942539 0.01946783
|
|
0.01935816 0.02005935 0.02455759 0.01990199]
|
|
|
|
mean value: 0.020281291007995604
|
|
|
|
key: test_mcc
|
|
value: [0.83898714 0.83911808 0.80412203 0.87954274 0.85871792 0.78535389
|
|
0.85871792 0.79728546 0.88420483 0.80195322]
|
|
|
|
mean value: 0.8348003223677418
|
|
|
|
key: train_mcc
|
|
value: [0.88390383 0.90334855 0.89591041 0.89591041 0.88477967 0.89041977
|
|
0.86573291 0.89332285 0.92148675 0.89537958]
|
|
|
|
mean value: 0.8930194733923259
|
|
|
|
key: test_accuracy
|
|
value: [0.91919192 0.91919192 0.8989899 0.93939394 0.92929293 0.88888889
|
|
0.92929293 0.88888889 0.93877551 0.89795918]
|
|
|
|
mean value: 0.9149866007008864
|
|
|
|
key: train_accuracy
|
|
value: [0.94150731 0.95050619 0.94713161 0.94713161 0.94150731 0.94488189
|
|
0.93250844 0.94600675 0.95955056 0.94719101]
|
|
|
|
mean value: 0.9457922675395912
|
|
|
|
key: test_fscore
|
|
value: [0.91666667 0.92 0.90384615 0.94 0.93069307 0.89719626
|
|
0.93069307 0.9009009 0.94230769 0.90384615]
|
|
|
|
mean value: 0.9186149967863672
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:156: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:159: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.94285714 0.95227766 0.94874591 0.94874591 0.94323144 0.94585635
|
|
0.93377483 0.94736842 0.96095445 0.94840834]
|
|
|
|
mean value: 0.9472220460744264
|
|
|
|
key: test_precision
|
|
value: [0.93617021 0.90196078 0.85454545 0.92156863 0.92156863 0.84210526
|
|
0.92156863 0.81967213 0.89090909 0.85454545]
|
|
|
|
mean value: 0.886461427373806
|
|
|
|
key: train_precision
|
|
value: [0.92258065 0.92033543 0.92161017 0.92161017 0.91525424 0.92841649
|
|
0.91558442 0.92307692 0.92872117 0.92703863]
|
|
|
|
mean value: 0.922422827637706
|
|
|
|
key: test_recall
|
|
value: [0.89795918 0.93877551 0.95918367 0.95918367 0.94 0.96
|
|
0.94 1. 1. 0.95918367]
|
|
|
|
mean value: 0.9554285714285714
|
|
|
|
key: train_recall
|
|
value: [0.96404494 0.98651685 0.97752809 0.97752809 0.97297297 0.96396396
|
|
0.9527027 0.97297297 0.99550562 0.97078652]
|
|
|
|
mean value: 0.9734522724972163
|
|
|
|
key: test_roc_auc
|
|
value: [0.91897959 0.91938776 0.89959184 0.93959184 0.92918367 0.88816327
|
|
0.92918367 0.8877551 0.93877551 0.89795918]
|
|
|
|
mean value: 0.9148571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.94148193 0.95046563 0.94709738 0.94709738 0.94154267 0.94490333
|
|
0.93253113 0.94603705 0.95955056 0.94719101]
|
|
|
|
mean value: 0.9457898066605932
|
|
|
|
key: test_jcc
|
|
value: [0.84615385 0.85185185 0.8245614 0.88679245 0.87037037 0.81355932
|
|
0.87037037 0.81967213 0.89090909 0.8245614 ]
|
|
|
|
mean value: 0.8498802242684702
|
|
|
|
key: train_jcc
|
|
value: [0.89189189 0.90890269 0.90248963 0.90248963 0.89256198 0.89727463
|
|
0.8757764 0.9 0.92484342 0.90187891]
|
|
|
|
mean value: 0.8998109188830197
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02711463 0.03046942 0.03095579 0.0293529 0.03025174 0.03120422
|
|
0.03226066 0.02296042 0.03180003 0.0298543 ]
|
|
|
|
mean value: 0.029622411727905272
|
|
|
|
key: score_time
|
|
value: [0.01199937 0.01189494 0.01194811 0.01183605 0.01183653 0.01487279
|
|
0.01187301 0.01176786 0.01473665 0.01184297]
|
|
|
|
mean value: 0.012460827827453613
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.25819889 0.37796447 0.5 0.25819889 0.62994079
|
|
0.8819171 0.40451992 1. 0.62994079]
|
|
|
|
mean value: 0.5715277519608599
|
|
|
|
key: train_mcc
|
|
value: [0.80680158 0.87508441 0.83462233 0.80586652 0.84927252 0.84795862
|
|
0.76396258 0.86111111 0.80586652 0.76396258]
|
|
|
|
mean value: 0.821450875728138
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.625 0.6875 0.75 0.625 0.8125 0.9375 0.6875 1. 0.8125]
|
|
|
|
mean value: 0.78125
|
|
|
|
key: train_accuracy
|
|
value: [0.90277778 0.9375 0.91666667 0.90277778 0.92361111 0.92361111
|
|
0.88194444 0.93055556 0.90277778 0.88194444]
|
|
|
|
mean value: 0.9104166666666667
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.57142857 0.66666667 0.75 0.57142857 0.82352941
|
|
0.94117647 0.73684211 1. 0.82352941]
|
|
|
|
mean value: 0.7741744066047471
|
|
|
|
key: train_fscore
|
|
value: [0.9 0.93706294 0.91428571 0.90140845 0.92086331 0.92198582
|
|
0.88111888 0.93055556 0.90140845 0.88111888]
|
|
|
|
mean value: 0.9089807995505774
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.71428571 0.75 0.66666667 0.77777778
|
|
0.88888889 0.63636364 1. 0.77777778]
|
|
|
|
mean value: 0.7878427128427128
|
|
|
|
key: train_precision
|
|
value: [0.92647059 0.94366197 0.94117647 0.91428571 0.95522388 0.94202899
|
|
0.88732394 0.93055556 0.91428571 0.88732394]
|
|
|
|
mean value: 0.9242336768209705
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.625 0.75 0.5 0.875 1. 0.875 1. 0.875]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [0.875 0.93055556 0.88888889 0.88888889 0.88888889 0.90277778
|
|
0.875 0.93055556 0.88888889 0.875 ]
|
|
|
|
mean value: 0.8944444444444444
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.625 0.6875 0.75 0.625 0.8125 0.9375 0.6875 1. 0.8125]
|
|
|
|
mean value: 0.78125
|
|
|
|
key: train_roc_auc
|
|
value: [0.90277778 0.9375 0.91666667 0.90277778 0.92361111 0.92361111
|
|
0.88194444 0.93055556 0.90277778 0.88194444]
|
|
|
|
mean value: 0.9104166666666667
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.4 0.5 0.6 0.4 0.7
|
|
0.88888889 0.58333333 1. 0.7 ]
|
|
|
|
mean value: 0.6522222222222223
|
|
|
|
key: train_jcc
|
|
value: [0.81818182 0.88157895 0.84210526 0.82051282 0.85333333 0.85526316
|
|
0.7875 0.87012987 0.82051282 0.7875 ]
|
|
|
|
mean value: 0.8336618031091715
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.72806501 0.81196308 0.72713518 0.71018243 0.81921387 0.72717166
|
|
0.72666717 0.81226301 0.71975946 0.69790673]
|
|
|
|
mean value: 0.7480327606201171
|
|
|
|
key: score_time
|
|
value: [0.01461482 0.01218319 0.01204562 0.01194263 0.01200271 0.01505589
|
|
0.01505637 0.0150106 0.01517773 0.01203251]
|
|
|
|
mean value: 0.013512206077575684
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.25 0.37796447 0.5 0.25819889 0.25819889
|
|
0.75 0.57735027 1. 0.62994079]
|
|
|
|
mean value: 0.5483570413730084
|
|
|
|
key: train_mcc
|
|
value: [0.86244307 0.83365502 0.76396258 0.63913552 0.87711752 0.98620624
|
|
1. 1. 0.86144352 0.86244307]
|
|
|
|
mean value: 0.86864065332072
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.625 0.6875 0.75 0.625 0.625 0.875 0.75 1. 0.8125]
|
|
|
|
mean value: 0.76875
|
|
|
|
key: train_accuracy
|
|
value: [0.93055556 0.91666667 0.88194444 0.81944444 0.9375 0.99305556
|
|
1. 1. 0.93055556 0.93055556]
|
|
|
|
mean value: 0.9340277777777778
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.625 0.66666667 0.75 0.57142857 0.66666667
|
|
0.875 0.8 1. 0.82352941]
|
|
|
|
mean value: 0.7711624649859944
|
|
|
|
key: train_fscore
|
|
value: [0.92857143 0.91780822 0.88111888 0.81690141 0.9352518 0.99310345
|
|
1. 1. 0.92957746 0.92857143]
|
|
|
|
mean value: 0.933090407751627
|
|
|
|
key: test_precision
|
|
value: [1. 0.625 0.71428571 0.75 0.66666667 0.6
|
|
0.875 0.66666667 1. 0.77777778]
|
|
|
|
mean value: 0.7675396825396825
|
|
|
|
key: train_precision
|
|
value: [0.95588235 0.90540541 0.88732394 0.82857143 0.97014925 0.98630137
|
|
1. 1. 0.94285714 0.95588235]
|
|
|
|
mean value: 0.9432373249972659
|
|
|
|
key: test_recall
|
|
value: [0.875 0.625 0.625 0.75 0.5 0.75 0.875 1. 1. 0.875]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_recall
|
|
value: [0.90277778 0.93055556 0.875 0.80555556 0.90277778 1.
|
|
1. 1. 0.91666667 0.90277778]
|
|
|
|
mean value: 0.9236111111111112
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.625 0.6875 0.75 0.625 0.625 0.875 0.75 1. 0.8125]
|
|
|
|
mean value: 0.76875
|
|
|
|
key: train_roc_auc
|
|
value: [0.93055556 0.91666667 0.88194444 0.81944444 0.9375 0.99305556
|
|
1. 1. 0.93055556 0.93055556]
|
|
|
|
mean value: 0.9340277777777778
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.45454545 0.5 0.6 0.4 0.5
|
|
0.77777778 0.66666667 1. 0.7 ]
|
|
|
|
mean value: 0.6473989898989899
|
|
|
|
key: train_jcc
|
|
value: [0.86666667 0.84810127 0.7875 0.69047619 0.87837838 0.98630137
|
|
1. 1. 0.86842105 0.86666667]
|
|
|
|
mean value: 0.879251159050528
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01242161 0.01170826 0.01003098 0.00964308 0.00956821 0.00975442
|
|
0.00983524 0.01015282 0.0098424 0.00980234]
|
|
|
|
mean value: 0.010275936126708985
|
|
|
|
key: score_time
|
|
value: [0.01263571 0.00957966 0.00948811 0.00938511 0.00946617 0.00960708
|
|
0.00958157 0.00948143 0.008816 0.00962996]
|
|
|
|
mean value: 0.00976707935333252
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.13483997 0.40451992 0.5 0.13483997 0.25
|
|
0.62994079 0.12598816 0.62994079 0.28867513]
|
|
|
|
mean value: 0.3476709204434449
|
|
|
|
key: train_mcc
|
|
value: [0.54131274 0.71393289 0.6761234 0.66742244 0.60720171 0.60194342
|
|
0.6479516 0.50074777 0.62300202 0.55742733]
|
|
|
|
mean value: 0.6137065323033065
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.5625 0.6875 0.75 0.5625 0.625 0.8125 0.5625 0.8125 0.625 ]
|
|
|
|
mean value: 0.6625
|
|
|
|
key: train_accuracy
|
|
value: [0.76388889 0.85416667 0.83333333 0.82638889 0.79861111 0.79861111
|
|
0.81944444 0.72916667 0.80555556 0.77083333]
|
|
|
|
mean value: 0.8
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.46153846 0.61538462 0.75 0.46153846 0.625
|
|
0.8 0.58823529 0.82352941 0.5 ]
|
|
|
|
mean value: 0.6025226244343891
|
|
|
|
key: train_fscore
|
|
value: [0.734375 0.84444444 0.81818182 0.80620155 0.77862595 0.78518519
|
|
0.8030303 0.66086957 0.78461538 0.74015748]
|
|
|
|
mean value: 0.7755686685575558
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.8 0.75 0.6 0.625
|
|
0.85714286 0.55555556 0.77777778 0.75 ]
|
|
|
|
mean value: 0.731547619047619
|
|
|
|
key: train_precision
|
|
value: [0.83928571 0.9047619 0.9 0.9122807 0.86440678 0.84126984
|
|
0.88333333 0.88372093 0.87931034 0.85454545]
|
|
|
|
mean value: 0.8762915004671795
|
|
|
|
key: test_recall
|
|
value: [0.25 0.375 0.5 0.75 0.375 0.625 0.75 0.625 0.875 0.375]
|
|
|
|
mean value: 0.55
|
|
|
|
key: train_recall
|
|
value: [0.65277778 0.79166667 0.75 0.72222222 0.70833333 0.73611111
|
|
0.73611111 0.52777778 0.70833333 0.65277778]
|
|
|
|
mean value: 0.6986111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.5625 0.6875 0.75 0.5625 0.625 0.8125 0.5625 0.8125 0.625 ]
|
|
|
|
mean value: 0.6625
|
|
|
|
key: train_roc_auc
|
|
value: [0.76388889 0.85416667 0.83333333 0.82638889 0.79861111 0.79861111
|
|
0.81944444 0.72916667 0.80555556 0.77083333]
|
|
|
|
mean value: 0.8
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.3 0.44444444 0.6 0.3 0.45454545
|
|
0.66666667 0.41666667 0.7 0.33333333]
|
|
|
|
mean value: 0.44656565656565655
|
|
|
|
key: train_jcc
|
|
value: [0.58024691 0.73076923 0.69230769 0.67532468 0.6375 0.64634146
|
|
0.67088608 0.49350649 0.64556962 0.5875 ]
|
|
|
|
mean value: 0.6359952165105505
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01015425 0.01005602 0.00992703 0.00919414 0.01004195 0.0100224
|
|
0.01005316 0.01010346 0.01003551 0.01018643]
|
|
|
|
mean value: 0.009977436065673828
|
|
|
|
key: score_time
|
|
value: [0.00961185 0.00959659 0.00884247 0.00910234 0.00959587 0.00961995
|
|
0.00957131 0.00957799 0.00952578 0.00952721]
|
|
|
|
mean value: 0.009457135200500488
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0. 0.12598816 0.25819889 0.25819889 0.25819889
|
|
0. 0.25819889 0.77459667 0.40451992]
|
|
|
|
mean value: 0.29678410917265274
|
|
|
|
key: train_mcc
|
|
value: [0.50019301 0.55641488 0.54810106 0.50019301 0.52777778 0.48728751
|
|
0.54424492 0.55641488 0.51633492 0.47240451]
|
|
|
|
mean value: 0.5209366498172819
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.5 0.5625 0.625 0.625 0.625 0.5 0.625 0.875 0.6875]
|
|
|
|
mean value: 0.64375
|
|
|
|
key: train_accuracy
|
|
value: [0.75 0.77777778 0.77083333 0.75 0.76388889 0.74305556
|
|
0.77083333 0.77777778 0.75694444 0.73611111]
|
|
|
|
mean value: 0.7597222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.42857143 0.53333333 0.57142857 0.66666667 0.66666667
|
|
0.42857143 0.66666667 0.88888889 0.61538462]
|
|
|
|
mean value: 0.6289707677942972
|
|
|
|
key: train_fscore
|
|
value: [0.75342466 0.78378378 0.78709677 0.75342466 0.76388889 0.75167785
|
|
0.78145695 0.78378378 0.76821192 0.73972603]
|
|
|
|
mean value: 0.7666475299636937
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.5 0.57142857 0.66666667 0.6 0.6
|
|
0.5 0.6 0.8 0.8 ]
|
|
|
|
mean value: 0.6415873015873016
|
|
|
|
key: train_precision
|
|
value: [0.74324324 0.76315789 0.73493976 0.74324324 0.76388889 0.72727273
|
|
0.74683544 0.76315789 0.73417722 0.72972973]
|
|
|
|
mean value: 0.744964603911551
|
|
|
|
key: test_recall
|
|
value: [0.875 0.375 0.5 0.5 0.75 0.75 0.375 0.75 1. 0.5 ]
|
|
|
|
mean value: 0.6375
|
|
|
|
key: train_recall
|
|
value: [0.76388889 0.80555556 0.84722222 0.76388889 0.76388889 0.77777778
|
|
0.81944444 0.80555556 0.80555556 0.75 ]
|
|
|
|
mean value: 0.7902777777777777
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.5 0.5625 0.625 0.625 0.625 0.5 0.625 0.875 0.6875]
|
|
|
|
mean value: 0.64375
|
|
|
|
key: train_roc_auc
|
|
value: [0.75 0.77777778 0.77083333 0.75 0.76388889 0.74305556
|
|
0.77083333 0.77777778 0.75694444 0.73611111]
|
|
|
|
mean value: 0.7597222222222222
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.27272727 0.36363636 0.4 0.5 0.5
|
|
0.27272727 0.5 0.8 0.44444444]
|
|
|
|
mean value: 0.47535353535353536
|
|
|
|
key: train_jcc
|
|
value: [0.6043956 0.64444444 0.64893617 0.6043956 0.61797753 0.60215054
|
|
0.64130435 0.64444444 0.62365591 0.58695652]
|
|
|
|
mean value: 0.6218661117160872
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00903463 0.00939488 0.00964975 0.00969124 0.00861764 0.0087409
|
|
0.00968075 0.00975513 0.00877261 0.00859642]
|
|
|
|
mean value: 0.00919339656829834
|
|
|
|
key: score_time
|
|
value: [0.0104301 0.01057196 0.01056194 0.01056337 0.0098033 0.00982547
|
|
0.01053381 0.01050091 0.00976157 0.00989175]
|
|
|
|
mean value: 0.010244417190551757
|
|
|
|
key: test_mcc
|
|
value: [ 0.25819889 0.28867513 0.40451992 0.25819889 -0.40451992 0.37796447
|
|
-0.12598816 0.40451992 0.12598816 0.57735027]
|
|
|
|
mean value: 0.21649075737659335
|
|
|
|
key: train_mcc
|
|
value: [0.55071783 0.66053205 0.51999331 0.57895977 0.59829303 0.51795131
|
|
0.55381862 0.56980288 0.55381862 0.49701671]
|
|
|
|
mean value: 0.5600904139114509
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.625 0.6875 0.625 0.3125 0.6875 0.4375 0.6875 0.5625 0.75 ]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_accuracy
|
|
value: [0.77083333 0.82638889 0.75694444 0.78472222 0.79166667 0.75694444
|
|
0.77083333 0.77777778 0.77083333 0.74305556]
|
|
|
|
mean value: 0.775
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.5 0.61538462 0.57142857 0.15384615 0.66666667
|
|
0.4 0.61538462 0.53333333 0.8 ]
|
|
|
|
mean value: 0.5427472527472528
|
|
|
|
key: train_fscore
|
|
value: [0.7480916 0.81203008 0.73684211 0.76335878 0.765625 0.74074074
|
|
0.74418605 0.75 0.74418605 0.71317829]
|
|
|
|
mean value: 0.7518238690468156
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.8 0.66666667 0.2 0.71428571
|
|
0.42857143 0.8 0.57142857 0.66666667]
|
|
|
|
mean value: 0.6264285714285714
|
|
|
|
key: train_precision
|
|
value: [0.83050847 0.8852459 0.80327869 0.84745763 0.875 0.79365079
|
|
0.84210526 0.85714286 0.84210526 0.80701754]
|
|
|
|
mean value: 0.8383512412827939
|
|
|
|
key: test_recall
|
|
value: [0.5 0.375 0.5 0.5 0.125 0.625 0.375 0.5 0.5 1. ]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_recall
|
|
value: [0.68055556 0.75 0.68055556 0.69444444 0.68055556 0.69444444
|
|
0.66666667 0.66666667 0.66666667 0.63888889]
|
|
|
|
mean value: 0.6819444444444445
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.625 0.6875 0.625 0.3125 0.6875 0.4375 0.6875 0.5625 0.75 ]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_roc_auc
|
|
value: [0.77083333 0.82638889 0.75694444 0.78472222 0.79166667 0.75694444
|
|
0.77083333 0.77777778 0.77083333 0.74305556]
|
|
|
|
mean value: 0.775
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.33333333 0.44444444 0.4 0.08333333 0.5
|
|
0.25 0.44444444 0.36363636 0.66666667]
|
|
|
|
mean value: 0.3885858585858586
|
|
|
|
key: train_jcc
|
|
value: [0.59756098 0.6835443 0.58333333 0.61728395 0.62025316 0.58823529
|
|
0.59259259 0.6 0.59259259 0.55421687]
|
|
|
|
mean value: 0.6029613074687515
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.76
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01255941 0.01184821 0.01181221 0.01090288 0.01072288 0.01183367
|
|
0.01193118 0.01202035 0.01188517 0.01202416]
|
|
|
|
mean value: 0.011754012107849121
|
|
|
|
key: score_time
|
|
value: [0.01044011 0.01010585 0.0101099 0.01012731 0.00992703 0.01021481
|
|
0.01026154 0.01019096 0.01012397 0.01021457]
|
|
|
|
mean value: 0.010171604156494141
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.25819889 0.25819889 0.25 0.37796447 0.37796447
|
|
0.77459667 0.40451992 0.8819171 0.67419986]
|
|
|
|
mean value: 0.5139477382071841
|
|
|
|
key: train_mcc
|
|
value: [0.72333935 0.77777778 0.75116009 0.76455285 0.78262379 0.76573752
|
|
0.73961489 0.76573752 0.76573752 0.73675093]
|
|
|
|
mean value: 0.75730322338289
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.625 0.625 0.625 0.6875 0.6875 0.875 0.6875 0.9375 0.8125]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_accuracy
|
|
value: [0.86111111 0.88888889 0.875 0.88194444 0.88888889 0.88194444
|
|
0.86805556 0.88194444 0.88194444 0.86805556]
|
|
|
|
mean value: 0.8777777777777778
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.57142857 0.57142857 0.625 0.66666667 0.70588235
|
|
0.88888889 0.73684211 0.93333333 0.84210526]
|
|
|
|
mean value: 0.7474909086441595
|
|
|
|
key: train_fscore
|
|
value: [0.85714286 0.88888889 0.87142857 0.87943262 0.88235294 0.87769784
|
|
0.86131387 0.87769784 0.87769784 0.86524823]
|
|
|
|
mean value: 0.8738901503493612
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.66666667 0.625 0.71428571 0.66666667
|
|
0.8 0.63636364 1. 0.72727273]
|
|
|
|
mean value: 0.7502922077922078
|
|
|
|
key: train_precision
|
|
value: [0.88235294 0.88888889 0.89705882 0.89855072 0.9375 0.91044776
|
|
0.90769231 0.91044776 0.91044776 0.88405797]
|
|
|
|
mean value: 0.9027444940521342
|
|
|
|
key: test_recall
|
|
value: [0.875 0.5 0.5 0.625 0.625 0.75 1. 0.875 0.875 1. ]
|
|
|
|
mean value: 0.7625
|
|
|
|
key: train_recall
|
|
value: [0.83333333 0.88888889 0.84722222 0.86111111 0.83333333 0.84722222
|
|
0.81944444 0.84722222 0.84722222 0.84722222]
|
|
|
|
mean value: 0.8472222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.625 0.625 0.625 0.6875 0.6875 0.875 0.6875 0.9375 0.8125]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [0.86111111 0.88888889 0.875 0.88194444 0.88888889 0.88194444
|
|
0.86805556 0.88194444 0.88194444 0.86805556]
|
|
|
|
mean value: 0.8777777777777778
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.4 0.4 0.45454545 0.5 0.54545455
|
|
0.8 0.58333333 0.875 0.72727273]
|
|
|
|
mean value: 0.6160606060606061
|
|
|
|
key: train_jcc
|
|
value: [0.75 0.8 0.7721519 0.78481013 0.78947368 0.78205128
|
|
0.75641026 0.78205128 0.78205128 0.7625 ]
|
|
|
|
mean value: 0.7761499812091085
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.71846819 0.62230611 0.61704159 0.60863018 0.77716041 0.61148214
|
|
0.608953 0.76959944 0.61665225 0.63989425]
|
|
|
|
mean value: 0.6590187549591064
|
|
|
|
key: score_time
|
|
value: [0.01350093 0.01299119 0.0122745 0.02449679 0.01511884 0.01508713
|
|
0.01227355 0.01241088 0.01220536 0.0151062 ]
|
|
|
|
mean value: 0.014546537399291992
|
|
|
|
key: test_mcc
|
|
value: [0.40451992 0. 0.25819889 0.5 0.25819889 0.12598816
|
|
0.62994079 0.57735027 0.5 0.40451992]
|
|
|
|
mean value: 0.3658716829658293
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.5 0.625 0.75 0.625 0.5625 0.8125 0.75 0.75 0.6875]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.61538462 0.42857143 0.57142857 0.75 0.57142857 0.58823529
|
|
0.8 0.8 0.75 0.73684211]
|
|
|
|
mean value: 0.6611890586193991
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.5 0.66666667 0.75 0.66666667 0.55555556
|
|
0.85714286 0.66666667 0.75 0.63636364]
|
|
|
|
mean value: 0.6849062049062049
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.375 0.5 0.75 0.5 0.625 0.75 1. 0.75 0.875]
|
|
|
|
mean value: 0.6625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.5 0.625 0.75 0.625 0.5625 0.8125 0.75 0.75 0.6875]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.44444444 0.27272727 0.4 0.6 0.4 0.41666667
|
|
0.66666667 0.66666667 0.6 0.58333333]
|
|
|
|
mean value: 0.5050505050505051
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0184567 0.01544476 0.01343727 0.01341653 0.01299381 0.01349688
|
|
0.0131917 0.01273799 0.01344085 0.01259589]
|
|
|
|
mean value: 0.013921236991882325
|
|
|
|
key: score_time
|
|
value: [0.01186252 0.00999475 0.00957751 0.00952244 0.00952959 0.00889778
|
|
0.00953364 0.0087533 0.0088768 0.00969267]
|
|
|
|
mean value: 0.009624099731445313
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.67419986 0.62994079 0.75 0.40451992 0.5
|
|
1. 0.37796447 0.8819171 0.62994079]
|
|
|
|
mean value: 0.6478423721684747
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.8125 0.8125 0.875 0.6875 0.75 1. 0.6875 0.9375 0.8125]
|
|
|
|
mean value: 0.81875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.84210526 0.8 0.875 0.61538462 0.75
|
|
1. 0.70588235 0.93333333 0.82352941]
|
|
|
|
mean value: 0.8145234976581726
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.72727273 0.85714286 0.875 0.8 0.75
|
|
1. 0.66666667 1. 0.77777778]
|
|
|
|
mean value: 0.8311002886002886
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.75 0.875 0.5 0.75 1. 0.75 0.875 0.875]
|
|
|
|
mean value: 0.8125
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.8125 0.8125 0.875 0.6875 0.75 1. 0.6875 0.9375 0.8125]
|
|
|
|
mean value: 0.81875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.72727273 0.66666667 0.77777778 0.44444444 0.6
|
|
1. 0.54545455 0.875 0.7 ]
|
|
|
|
mean value: 0.7003282828282829
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09847045 0.09635401 0.09673977 0.09675288 0.0990653 0.09674096
|
|
0.09746361 0.09468126 0.09846497 0.09865212]
|
|
|
|
mean value: 0.09733853340148926
|
|
|
|
key: score_time
|
|
value: [0.01907158 0.01847291 0.01872921 0.01913571 0.01924872 0.01915073
|
|
0.01923704 0.01855826 0.01923871 0.01950359]
|
|
|
|
mean value: 0.019034647941589357
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.12598816 0.5 0.5 0.37796447 0.5
|
|
0.77459667 0.51639778 0.62994079 0.62994079]
|
|
|
|
mean value: 0.5329425325353683
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.5625 0.75 0.75 0.6875 0.75 0.875 0.75 0.8125 0.8125]
|
|
|
|
mean value: 0.7625
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.53333333 0.75 0.75 0.66666667 0.75
|
|
0.88888889 0.77777778 0.8 0.82352941]
|
|
|
|
mean value: 0.7597338935574229
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.57142857 0.75 0.75 0.71428571 0.75
|
|
0.8 0.7 0.85714286 0.77777778]
|
|
|
|
mean value: 0.767063492063492
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.75 0.75 0.625 0.75 1. 0.875 0.75 0.875]
|
|
|
|
mean value: 0.7625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.5625 0.75 0.75 0.6875 0.75 0.875 0.75 0.8125 0.8125]
|
|
|
|
mean value: 0.7625
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.36363636 0.6 0.6 0.5 0.6
|
|
0.8 0.63636364 0.66666667 0.7 ]
|
|
|
|
mean value: 0.6216666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01027179 0.0096848 0.01019812 0.00993538 0.00993776 0.00999069
|
|
0.00986218 0.00992012 0.01005292 0.00992417]
|
|
|
|
mean value: 0.00997779369354248
|
|
|
|
key: score_time
|
|
value: [0.01037312 0.00979018 0.0094502 0.0092504 0.00945425 0.00944686
|
|
0.00948811 0.00944257 0.00942183 0.00938606]
|
|
|
|
mean value: 0.009550356864929199
|
|
|
|
key: test_mcc
|
|
value: [0.40451992 0.5 0.37796447 0. 0.13483997 0.40451992
|
|
0.12598816 0. 0. 0.12598816]
|
|
|
|
mean value: 0.20738205957972508
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.75 0.6875 0.5 0.5625 0.6875 0.5625 0.5 0.5 0.5625]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.61538462 0.75 0.70588235 0.5 0.63157895 0.61538462
|
|
0.53333333 0.5 0.55555556 0.58823529]
|
|
|
|
mean value: 0.5995354714085365
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.75 0.66666667 0.5 0.54545455 0.8
|
|
0.57142857 0.5 0.5 0.55555556]
|
|
|
|
mean value: 0.6189105339105339
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.75 0.75 0.5 0.75 0.5 0.5 0.5 0.625 0.625]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.75 0.6875 0.5 0.5625 0.6875 0.5625 0.5 0.5 0.5625]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.44444444 0.6 0.54545455 0.33333333 0.46153846 0.44444444
|
|
0.36363636 0.33333333 0.38461538 0.41666667]
|
|
|
|
mean value: 0.4327466977466977
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.34
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.23453951 1.1755209 1.14849567 1.15972948 1.16841793 1.18396425
|
|
1.17039299 1.18397331 1.18276644 1.20701766]
|
|
|
|
mean value: 1.1814818143844605
|
|
|
|
key: score_time
|
|
value: [0.09738564 0.09694934 0.0897789 0.09487081 0.09689403 0.09542942
|
|
0.09747982 0.09665608 0.09580231 0.0981226 ]
|
|
|
|
mean value: 0.09593689441680908
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.37796447 0.62994079 0.8819171 0.25 0.5
|
|
0.8819171 0.40451992 0.77459667 0.62994079]
|
|
|
|
mean value: 0.621271394749067
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.6875 0.8125 0.9375 0.625 0.75 0.9375 0.6875 0.875 0.8125]
|
|
|
|
mean value: 0.80625
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.70588235 0.8 0.93333333 0.625 0.75
|
|
0.94117647 0.73684211 0.85714286 0.82352941]
|
|
|
|
mean value: 0.81062398643668
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.85714286 1. 0.625 0.75
|
|
0.88888889 0.63636364 1. 0.77777778]
|
|
|
|
mean value: 0.8201839826839826
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.75 0.75 0.875 0.625 0.75 1. 0.875 0.75 0.875]
|
|
|
|
mean value: 0.8125
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.6875 0.8125 0.9375 0.625 0.75 0.9375 0.6875 0.875 0.8125]
|
|
|
|
mean value: 0.80625
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.54545455 0.66666667 0.875 0.45454545 0.6
|
|
0.88888889 0.58333333 0.75 0.7 ]
|
|
|
|
mean value: 0.6938888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.58
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Random Forest2
|
|
Model func: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.80555797 0.84374928 0.85162091 0.8727107 0.86085701 0.90116239
|
|
0.89841175 0.89694309 0.8769362 0.8891139 ]
|
|
|
|
mean value: 0.8697063207626343
|
|
|
|
key: score_time
|
|
value: [0.23004627 0.18282223 0.18825459 0.16032267 0.22532058 0.23189497
|
|
0.22093725 0.23716879 0.24110723 0.19134307]
|
|
|
|
mean value: 0.2109217643737793
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.12598816 0.51639778 0.75 0.51639778 0.5
|
|
0.77459667 0.62994079 0.77459667 0.62994079]
|
|
|
|
mean value: 0.6099775735526974
|
|
|
|
key: train_mcc
|
|
value: [0.97259753 0.93280752 0.98620624 0.97259753 0.97259753 0.94480902
|
|
0.9591663 0.9459053 0.97259753 0.98620624]
|
|
|
|
mean value: 0.964549072487303
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.5625 0.75 0.875 0.75 0.75 0.875 0.8125 0.875 0.8125]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_accuracy
|
|
value: [0.98611111 0.96527778 0.99305556 0.98611111 0.98611111 0.97222222
|
|
0.97916667 0.97222222 0.98611111 0.99305556]
|
|
|
|
mean value: 0.9819444444444445
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.53333333 0.71428571 0.875 0.71428571 0.75
|
|
0.88888889 0.82352941 0.85714286 0.82352941]
|
|
|
|
mean value: 0.7913328664799253
|
|
|
|
key: train_fscore
|
|
value: [0.98591549 0.96402878 0.99300699 0.98591549 0.98591549 0.97183099
|
|
0.9787234 0.97142857 0.98591549 0.99300699]
|
|
|
|
mean value: 0.9815687696422772
|
|
|
|
key: test_precision
|
|
value: [1. 0.57142857 0.83333333 0.875 0.83333333 0.75
|
|
0.8 0.77777778 1. 0.77777778]
|
|
|
|
mean value: 0.8218650793650794
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 0.98571429
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985714285714286
|
|
|
|
key: test_recall
|
|
value: [0.875 0.5 0.625 0.875 0.625 0.75 1. 0.875 0.75 0.875]
|
|
|
|
mean value: 0.775
|
|
|
|
key: train_recall
|
|
value: [0.97222222 0.93055556 0.98611111 0.97222222 0.97222222 0.95833333
|
|
0.95833333 0.94444444 0.97222222 0.98611111]
|
|
|
|
mean value: 0.9652777777777778
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.5625 0.75 0.875 0.75 0.75 0.875 0.8125 0.875 0.8125]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_roc_auc
|
|
value: [0.98611111 0.96527778 0.99305556 0.98611111 0.98611111 0.97222222
|
|
0.97916667 0.97222222 0.98611111 0.99305556]
|
|
|
|
mean value: 0.9819444444444445
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.36363636 0.55555556 0.77777778 0.55555556 0.6
|
|
0.8 0.7 0.75 0.7 ]
|
|
|
|
mean value: 0.6677525252525253
|
|
|
|
key: train_jcc
|
|
value: [0.97222222 0.93055556 0.98611111 0.97222222 0.97222222 0.94520548
|
|
0.95833333 0.94444444 0.97222222 0.98611111]
|
|
|
|
mean value: 0.9639649923896499
|
|
|
|
MCC on Blind test: 0.56
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02202415 0.00936294 0.00939274 0.00888824 0.00935483 0.00897217
|
|
0.00924826 0.00890183 0.00893831 0.00897026]
|
|
|
|
mean value: 0.010405373573303223
|
|
|
|
key: score_time
|
|
value: [0.01327825 0.00917912 0.0093596 0.00863504 0.00918007 0.00874853
|
|
0.00862384 0.00864148 0.00872564 0.00902987]
|
|
|
|
mean value: 0.009340143203735352
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0. 0.12598816 0.25819889 0.25819889 0.25819889
|
|
0. 0.25819889 0.77459667 0.40451992]
|
|
|
|
mean value: 0.29678410917265274
|
|
|
|
key: train_mcc
|
|
value: [0.50019301 0.55641488 0.54810106 0.50019301 0.52777778 0.48728751
|
|
0.54424492 0.55641488 0.51633492 0.47240451]
|
|
|
|
mean value: 0.5209366498172819
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.5 0.5625 0.625 0.625 0.625 0.5 0.625 0.875 0.6875]
|
|
|
|
mean value: 0.64375
|
|
|
|
key: train_accuracy
|
|
value: [0.75 0.77777778 0.77083333 0.75 0.76388889 0.74305556
|
|
0.77083333 0.77777778 0.75694444 0.73611111]
|
|
|
|
mean value: 0.7597222222222222
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.42857143 0.53333333 0.57142857 0.66666667 0.66666667
|
|
0.42857143 0.66666667 0.88888889 0.61538462]
|
|
|
|
mean value: 0.6289707677942972
|
|
|
|
key: train_fscore
|
|
value: [0.75342466 0.78378378 0.78709677 0.75342466 0.76388889 0.75167785
|
|
0.78145695 0.78378378 0.76821192 0.73972603]
|
|
|
|
mean value: 0.7666475299636937
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.5 0.57142857 0.66666667 0.6 0.6
|
|
0.5 0.6 0.8 0.8 ]
|
|
|
|
mean value: 0.6415873015873016
|
|
|
|
key: train_precision
|
|
value: [0.74324324 0.76315789 0.73493976 0.74324324 0.76388889 0.72727273
|
|
0.74683544 0.76315789 0.73417722 0.72972973]
|
|
|
|
mean value: 0.744964603911551
|
|
|
|
key: test_recall
|
|
value: [0.875 0.375 0.5 0.5 0.75 0.75 0.375 0.75 1. 0.5 ]
|
|
|
|
mean value: 0.6375
|
|
|
|
key: train_recall
|
|
value: [0.76388889 0.80555556 0.84722222 0.76388889 0.76388889 0.77777778
|
|
0.81944444 0.80555556 0.80555556 0.75 ]
|
|
|
|
mean value: 0.7902777777777777
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.5 0.5625 0.625 0.625 0.625 0.5 0.625 0.875 0.6875]
|
|
|
|
mean value: 0.64375
|
|
|
|
key: train_roc_auc
|
|
value: [0.75 0.77777778 0.77083333 0.75 0.76388889 0.74305556
|
|
0.77083333 0.77777778 0.75694444 0.73611111]
|
|
|
|
mean value: 0.7597222222222222
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.27272727 0.36363636 0.4 0.5 0.5
|
|
0.27272727 0.5 0.8 0.44444444]
|
|
|
|
mean value: 0.47535353535353536
|
|
|
|
key: train_jcc
|
|
value: [0.6043956 0.64444444 0.64893617 0.6043956 0.61797753 0.60215054
|
|
0.64130435 0.64444444 0.62365591 0.58695652]
|
|
|
|
mean value: 0.6218661117160872
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.0698278 0.05860615 0.05591798 0.05624771 0.20103455 0.04858589
|
|
0.04722667 0.04717255 0.0465014 0.04586387]
|
|
|
|
mean value: 0.06769845485687256
|
|
|
|
key: score_time
|
|
value: [0.01165795 0.01121998 0.01152229 0.01119447 0.01186419 0.01077414
|
|
0.01033926 0.01064968 0.01068974 0.0104177 ]
|
|
|
|
mean value: 0.011032938957214355
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.67419986 0.67419986 0.8819171 0.77459667 0.77459667
|
|
0.8819171 0.62994079 1. 0.8819171 ]
|
|
|
|
mean value: 0.805520226651095
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.8125 0.8125 0.9375 0.875 0.875 0.9375 0.8125 1. 0.9375]
|
|
|
|
mean value: 0.89375
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.84210526 0.76923077 0.93333333 0.85714286 0.88888889
|
|
0.93333333 0.82352941 1. 0.93333333]
|
|
|
|
mean value: 0.8914230523518449
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.72727273 1. 1. 1. 0.8
|
|
1. 0.77777778 1. 1. ]
|
|
|
|
mean value: 0.9305050505050505
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 0.625 0.875 0.75 1. 0.875 0.875 1. 0.875]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.8125 0.8125 0.9375 0.875 0.875 0.9375 0.8125 1. 0.9375]
|
|
|
|
mean value: 0.89375
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.72727273 0.625 0.875 0.75 0.8
|
|
0.875 0.7 1. 0.875 ]
|
|
|
|
mean value: 0.8102272727272727
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.72
|
|
|
|
Accuracy on Blind test: 0.92
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02964163 0.04872727 0.05161095 0.04942465 0.04742837 0.04894733
|
|
0.04912353 0.0492754 0.04936814 0.04904819]
|
|
|
|
mean value: 0.04725954532623291
|
|
|
|
key: score_time
|
|
value: [0.02136135 0.0172255 0.01896691 0.01955104 0.02322507 0.02261996
|
|
0.02171683 0.0240128 0.02368546 0.02297282]
|
|
|
|
mean value: 0.021533775329589843
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.75 0. 0.51639778 0.57735027 0.37796447
|
|
0.12598816 0.25819889 0.37796447 0.5 ]
|
|
|
|
mean value: 0.3861828515128533
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.98620624 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9986206235898977
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.875 0.5 0.75 0.75 0.6875 0.5625 0.625 0.6875 0.75 ]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.99305556 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9993055555555556
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.875 0.42857143 0.71428571 0.66666667 0.70588235
|
|
0.58823529 0.66666667 0.70588235 0.75 ]
|
|
|
|
mean value: 0.6767857142857143
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.99310345 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9993103448275862
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.875 0.5 0.83333333 1. 0.66666667
|
|
0.55555556 0.6 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7161507936507936
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.98630137 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9986301369863013
|
|
|
|
key: test_recall
|
|
value: [0.625 0.875 0.375 0.625 0.5 0.75 0.625 0.75 0.75 0.75 ]
|
|
|
|
mean value: 0.6625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.875 0.5 0.75 0.75 0.6875 0.5625 0.625 0.6875 0.75 ]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.99305556 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9993055555555556
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.77777778 0.27272727 0.55555556 0.5 0.54545455
|
|
0.41666667 0.5 0.54545455 0.6 ]
|
|
|
|
mean value: 0.5213636363636364
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.98630137 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9986301369863013
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02249289 0.00916409 0.00881982 0.0087142 0.00878906 0.00870824
|
|
0.00863624 0.00872755 0.00871754 0.00863981]
|
|
|
|
mean value: 0.010140943527221679
|
|
|
|
key: score_time
|
|
value: [0.0092752 0.00889182 0.00861144 0.00849223 0.00855494 0.00846267
|
|
0.00859952 0.00850701 0.00850463 0.00861669]
|
|
|
|
mean value: 0.00865161418914795
|
|
|
|
key: test_mcc
|
|
value: [0.67419986 0.25819889 0.37796447 0.5 0.37796447 0.5
|
|
0.77459667 0.28867513 0.8819171 0.77459667]
|
|
|
|
mean value: 0.5408113274994835
|
|
|
|
key: train_mcc
|
|
value: [0.61134702 0.59866751 0.64111887 0.61134702 0.61205637 0.61111111
|
|
0.61134702 0.63888889 0.55577001 0.55641488]
|
|
|
|
mean value: 0.6048068705813204
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.625 0.6875 0.75 0.6875 0.75 0.875 0.625 0.9375 0.875 ]
|
|
|
|
mean value: 0.7625
|
|
|
|
key: train_accuracy
|
|
value: [0.80555556 0.79861111 0.81944444 0.80555556 0.80555556 0.80555556
|
|
0.80555556 0.81944444 0.77777778 0.77777778]
|
|
|
|
mean value: 0.8020833333333334
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.57142857 0.66666667 0.75 0.66666667 0.75
|
|
0.88888889 0.7 0.93333333 0.88888889]
|
|
|
|
mean value: 0.7585103785103785
|
|
|
|
key: train_fscore
|
|
value: [0.80821918 0.80536913 0.82666667 0.80821918 0.81081081 0.80555556
|
|
0.8028169 0.81944444 0.78082192 0.78378378]
|
|
|
|
mean value: 0.8051707564159093
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.71428571 0.75 0.71428571 0.75
|
|
0.8 0.58333333 1. 0.8 ]
|
|
|
|
mean value: 0.7778571428571429
|
|
|
|
key: train_precision
|
|
value: [0.7972973 0.77922078 0.79487179 0.7972973 0.78947368 0.80555556
|
|
0.81428571 0.81944444 0.77027027 0.76315789]
|
|
|
|
mean value: 0.7930874732190522
|
|
|
|
key: test_recall
|
|
value: [0.625 0.5 0.625 0.75 0.625 0.75 1. 0.875 0.875 1. ]
|
|
|
|
mean value: 0.7625
|
|
|
|
key: train_recall
|
|
value: [0.81944444 0.83333333 0.86111111 0.81944444 0.83333333 0.80555556
|
|
0.79166667 0.81944444 0.79166667 0.80555556]
|
|
|
|
mean value: 0.8180555555555555
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.625 0.6875 0.75 0.6875 0.75 0.875 0.625 0.9375 0.875 ]
|
|
|
|
mean value: 0.7625
|
|
|
|
key: train_roc_auc
|
|
value: [0.80555556 0.79861111 0.81944444 0.80555556 0.80555556 0.80555556
|
|
0.80555556 0.81944444 0.77777778 0.77777778]
|
|
|
|
mean value: 0.8020833333333333
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.4 0.5 0.6 0.5 0.6
|
|
0.8 0.53846154 0.875 0.8 ]
|
|
|
|
mean value: 0.6238461538461538
|
|
|
|
key: train_jcc
|
|
value: [0.67816092 0.6741573 0.70454545 0.67816092 0.68181818 0.6744186
|
|
0.67058824 0.69411765 0.64044944 0.64444444]
|
|
|
|
mean value: 0.6740861148465678
|
|
|
|
MCC on Blind test: 0.47
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01058078 0.01430964 0.01485038 0.01497197 0.01408148 0.01484871
|
|
0.0134542 0.01463318 0.01502848 0.01584578]
|
|
|
|
mean value: 0.014260458946228027
|
|
|
|
key: score_time
|
|
value: [0.00895619 0.0115304 0.01166797 0.01158357 0.01168847 0.01154566
|
|
0.01149583 0.01192141 0.01182699 0.01205111]
|
|
|
|
mean value: 0.011426758766174317
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.5 0.37796447 0.37796447 0.16012815 0.37796447
|
|
0.67419986 0.48038446 0.57735027 0.25819889]
|
|
|
|
mean value: 0.45587517248895426
|
|
|
|
key: train_mcc
|
|
value: [0.80555556 0.79543488 0.85391974 0.84148907 0.59874082 0.88888889
|
|
0.67419986 0.81343943 0.7526253 0.55595945]
|
|
|
|
mean value: 0.7580252998153613
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.6875 0.6875 0.5625 0.6875 0.8125 0.6875 0.75 0.5625]
|
|
|
|
mean value: 0.70625
|
|
|
|
key: train_accuracy
|
|
value: [0.90277778 0.89583333 0.92361111 0.91666667 0.76388889 0.94444444
|
|
0.8125 0.90277778 0.86805556 0.73611111]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.75 0.70588235 0.70588235 0.66666667 0.70588235
|
|
0.76923077 0.76190476 0.8 0.69565217]
|
|
|
|
mean value: 0.7418244287681628
|
|
|
|
key: train_fscore
|
|
value: [0.90277778 0.90066225 0.92810458 0.92207792 0.80898876 0.94444444
|
|
0.76923077 0.90909091 0.88050314 0.79120879]
|
|
|
|
mean value: 0.8757089349348673
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.66666667 0.66666667 0.53846154 0.66666667
|
|
1. 0.61538462 0.66666667 0.53333333]
|
|
|
|
mean value: 0.7103846153846154
|
|
|
|
key: train_precision
|
|
value: [0.90277778 0.86075949 0.87654321 0.86585366 0.67924528 0.94444444
|
|
1. 0.85365854 0.8045977 0.65454545]
|
|
|
|
mean value: 0.8442425559605351
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.75 0.75 0.875 0.75 0.625 1. 1. 1. ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_recall
|
|
value: [0.90277778 0.94444444 0.98611111 0.98611111 1. 0.94444444
|
|
0.625 0.97222222 0.97222222 1. ]
|
|
|
|
mean value: 0.9333333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.6875 0.6875 0.5625 0.6875 0.8125 0.6875 0.75 0.5625]
|
|
|
|
mean value: 0.70625
|
|
|
|
key: train_roc_auc
|
|
value: [0.90277778 0.89583333 0.92361111 0.91666667 0.76388889 0.94444444
|
|
0.8125 0.90277778 0.86805556 0.73611111]
|
|
|
|
mean value: 0.8666666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.6 0.54545455 0.54545455 0.5 0.54545455
|
|
0.625 0.61538462 0.66666667 0.53333333]
|
|
|
|
mean value: 0.5926748251748252
|
|
|
|
key: train_jcc
|
|
value: [0.82278481 0.81927711 0.86585366 0.85542169 0.67924528 0.89473684
|
|
0.625 0.83333333 0.78651685 0.65454545]
|
|
|
|
mean value: 0.7836715030779394
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01417518 0.01484919 0.01304817 0.01441622 0.01313996 0.01317859
|
|
0.01370192 0.01358104 0.01534057 0.01391077]
|
|
|
|
mean value: 0.013934159278869629
|
|
|
|
key: score_time
|
|
value: [0.01190114 0.01150131 0.01153016 0.01156521 0.01159763 0.01155448
|
|
0.01160812 0.01152849 0.01162577 0.0115304 ]
|
|
|
|
mean value: 0.011594271659851075
|
|
|
|
key: test_mcc
|
|
value: [0.67419986 0.57735027 0.37796447 0.77459667 0.25 0.37796447
|
|
1. 0. 0.48038446 0.67419986]
|
|
|
|
mean value: 0.5186660070791309
|
|
|
|
key: train_mcc
|
|
value: [0.76923389 0.6094494 0.75467294 0.80489529 0.83624201 0.87508441
|
|
0.83333333 0.62017367 0.68511879 0.62017367]
|
|
|
|
mean value: 0.7408377410274369
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.6875 0.875 0.625 0.6875 1. 0.5 0.6875 0.8125]
|
|
|
|
mean value: 0.74375
|
|
|
|
key: train_accuracy
|
|
value: [0.875 0.77083333 0.875 0.89583333 0.91666667 0.9375
|
|
0.91666667 0.77777778 0.81944444 0.77777778]
|
|
|
|
mean value: 0.85625
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.8 0.66666667 0.85714286 0.625 0.70588235
|
|
1. 0.66666667 0.76190476 0.76923077]
|
|
|
|
mean value: 0.7621724843783667
|
|
|
|
key: train_fscore
|
|
value: [0.859375 0.81355932 0.88157895 0.88549618 0.92 0.93706294
|
|
0.91666667 0.81818182 0.84705882 0.71428571]
|
|
|
|
mean value: 0.8593265412334974
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.71428571 1. 0.625 0.66666667
|
|
1. 0.5 0.61538462 1. ]
|
|
|
|
mean value: 0.7788003663003663
|
|
|
|
key: train_precision
|
|
value: [0.98214286 0.68571429 0.8375 0.98305085 0.88461538 0.94366197
|
|
0.91666667 0.69230769 0.73469388 1. ]
|
|
|
|
mean value: 0.866035358328652
|
|
|
|
key: test_recall
|
|
value: [0.625 1. 0.625 0.75 0.625 0.75 1. 1. 1. 0.625]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [0.76388889 1. 0.93055556 0.80555556 0.95833333 0.93055556
|
|
0.91666667 1. 1. 0.55555556]
|
|
|
|
mean value: 0.8861111111111111
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.6875 0.875 0.625 0.6875 1. 0.5 0.6875 0.8125]
|
|
|
|
mean value: 0.74375
|
|
|
|
key: train_roc_auc
|
|
value: [0.875 0.77083333 0.875 0.89583333 0.91666667 0.9375
|
|
0.91666667 0.77777778 0.81944444 0.77777778]
|
|
|
|
mean value: 0.85625
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.66666667 0.5 0.75 0.45454545 0.54545455
|
|
1. 0.5 0.61538462 0.625 ]
|
|
|
|
mean value: 0.6282051282051282
|
|
|
|
key: train_jcc
|
|
value: [0.75342466 0.68571429 0.78823529 0.79452055 0.85185185 0.88157895
|
|
0.84615385 0.69230769 0.73469388 0.55555556]
|
|
|
|
mean value: 0.7584036556099772
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11346936 0.10075355 0.10010242 0.10053444 0.1010046 0.10101986
|
|
0.10089087 0.09908795 0.10078716 0.09968233]
|
|
|
|
mean value: 0.10173325538635254
|
|
|
|
key: score_time
|
|
value: [0.0152452 0.01523733 0.0152607 0.01565003 0.01543975 0.01593566
|
|
0.01502085 0.01519608 0.01509285 0.01549172]
|
|
|
|
mean value: 0.015357017517089844
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.77459667 0.67419986 0.77459667 0.51639778 0.5
|
|
0.75 0.25819889 0.62994079 0.75 ]
|
|
|
|
mean value: 0.6402527327777887
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.8125 0.875 0.75 0.75 0.875 0.625 0.8125 0.875 ]
|
|
|
|
mean value: 0.8125
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.88888889 0.76923077 0.85714286 0.71428571 0.75
|
|
0.875 0.66666667 0.82352941 0.875 ]
|
|
|
|
mean value: 0.8076887165122459
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 1. 1. 0.83333333 0.75
|
|
0.875 0.6 0.77777778 0.875 ]
|
|
|
|
mean value: 0.8511111111111112
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.625 0.75 0.625 0.75 0.875 0.75 0.875 0.875]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.8125 0.875 0.75 0.75 0.875 0.625 0.8125 0.875 ]
|
|
|
|
mean value: 0.8125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.8 0.625 0.75 0.55555556 0.6
|
|
0.77777778 0.5 0.7 0.77777778]
|
|
|
|
mean value: 0.6836111111111112
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03880906 0.03364229 0.05067086 0.03772902 0.03970957 0.0343678
|
|
0.0453105 0.04242063 0.03796387 0.03878975]
|
|
|
|
mean value: 0.03994133472442627
|
|
|
|
key: score_time
|
|
value: [0.01643252 0.02530622 0.03485131 0.01991534 0.02371836 0.01844072
|
|
0.01818657 0.0173986 0.02433991 0.0229857 ]
|
|
|
|
mean value: 0.02215752601623535
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.77459667 0.51639778 0.75 0.67419986 0.62994079
|
|
0.8819171 0.37796447 0.8819171 0.8819171 ]
|
|
|
|
mean value: 0.714344755286306
|
|
|
|
key: train_mcc
|
|
value: [0.97222222 1. 0.97259753 0.97259753 0.97222222 1.
|
|
0.98620624 0.98620624 0.98620624 0.98620624]
|
|
|
|
mean value: 0.9834464438358899
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.75 0.875 0.8125 0.8125 0.9375 0.6875 0.9375 0.9375]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_accuracy
|
|
value: [0.98611111 1. 0.98611111 0.98611111 0.98611111 1.
|
|
0.99305556 0.99305556 0.99305556 0.99305556]
|
|
|
|
mean value: 0.9916666666666667
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.88888889 0.71428571 0.875 0.76923077 0.8
|
|
0.93333333 0.70588235 0.93333333 0.93333333]
|
|
|
|
mean value: 0.8410430582489407
|
|
|
|
key: train_fscore
|
|
value: [0.98611111 1. 0.98591549 0.98591549 0.98611111 1.
|
|
0.99300699 0.99300699 0.99300699 0.99300699]
|
|
|
|
mean value: 0.9916081180165687
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 0.83333333 0.875 1. 0.85714286
|
|
1. 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.9032142857142857
|
|
|
|
key: train_precision
|
|
value: [0.98611111 1. 1. 1. 0.98611111 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9972222222222222
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.625 0.875 0.625 0.75 0.875 0.75 0.875 0.875]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [0.98611111 1. 0.97222222 0.97222222 0.98611111 1.
|
|
0.98611111 0.98611111 0.98611111 0.98611111]
|
|
|
|
mean value: 0.9861111111111112
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.75 0.875 0.8125 0.8125 0.9375 0.6875 0.9375 0.9375]
|
|
|
|
mean value: 0.85
|
|
|
|
key: train_roc_auc
|
|
value: [0.98611111 1. 0.98611111 0.98611111 0.98611111 1.
|
|
0.99305556 0.99305556 0.99305556 0.99305556]
|
|
|
|
mean value: 0.9916666666666667
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.8 0.55555556 0.77777778 0.625 0.66666667
|
|
0.875 0.54545455 0.875 0.875 ]
|
|
|
|
mean value: 0.7345454545454545
|
|
|
|
key: train_jcc
|
|
value: [0.97260274 1. 0.97222222 0.97222222 0.97260274 1.
|
|
0.98611111 0.98611111 0.98611111 0.98611111]
|
|
|
|
mean value: 0.9834094368340944
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02955914 0.04669333 0.04862022 0.04951882 0.02089596 0.02093339
|
|
0.02103496 0.04321647 0.04756021 0.03159857]
|
|
|
|
mean value: 0.03596310615539551
|
|
|
|
key: score_time
|
|
value: [0.02113533 0.02523756 0.02448869 0.02571726 0.01315403 0.01300144
|
|
0.01301503 0.02466917 0.02317739 0.01307464]
|
|
|
|
mean value: 0.01966705322265625
|
|
|
|
key: test_mcc
|
|
value: [ 0.40451992 -0.13483997 -0.13483997 0.25819889 0.16012815 0.51639778
|
|
0.25819889 0.40451992 0.25819889 0.37796447]
|
|
|
|
mean value: 0.23684469655207135
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 0.98620624 1.
|
|
1. 0.98620624 1. 1. ]
|
|
|
|
mean value: 0.9972412471797952
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.4375 0.4375 0.625 0.5625 0.75 0.625 0.6875 0.625 0.6875]
|
|
|
|
mean value: 0.6125
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 0.99305556 1.
|
|
1. 0.99305556 1. 1. ]
|
|
|
|
mean value: 0.9986111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.61538462 0.30769231 0.30769231 0.57142857 0.36363636 0.71428571
|
|
0.57142857 0.61538462 0.57142857 0.70588235]
|
|
|
|
mean value: 0.5344243991302815
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 0.99300699 1.
|
|
1. 0.99300699 1. 1. ]
|
|
|
|
mean value: 0.9986013986013986
|
|
|
|
key: test_precision
|
|
value: [0.8 0.4 0.4 0.66666667 0.66666667 0.83333333
|
|
0.66666667 0.8 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6566666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.25 0.25 0.5 0.25 0.625 0.5 0.5 0.5 0.75 ]
|
|
|
|
mean value: 0.4625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98611111 1.
|
|
1. 0.98611111 1. 1. ]
|
|
|
|
mean value: 0.9972222222222222
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.4375 0.4375 0.625 0.5625 0.75 0.625 0.6875 0.625 0.6875]
|
|
|
|
mean value: 0.6125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 0.99305556 1.
|
|
1. 0.99305556 1. 1. ]
|
|
|
|
mean value: 0.9986111111111111
|
|
|
|
key: test_jcc
|
|
value: [0.44444444 0.18181818 0.18181818 0.4 0.22222222 0.55555556
|
|
0.4 0.44444444 0.4 0.54545455]
|
|
|
|
mean value: 0.37757575757575756
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 0.98611111 1.
|
|
1. 0.98611111 1. 1. ]
|
|
|
|
mean value: 0.9972222222222222
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.30551982 0.29990411 0.29476953 0.28838086 0.2883904 0.28441834
|
|
0.28576183 0.2811265 0.29715872 0.28773928]
|
|
|
|
mean value: 0.29131693840026857
|
|
|
|
key: score_time
|
|
value: [0.01042271 0.01003242 0.01025629 0.01001406 0.00966573 0.00991654
|
|
0.00951052 0.00932312 0.00930119 0.01014185]
|
|
|
|
mean value: 0.009858441352844239
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.77459667 0.67419986 0.75 0.40451992 0.62994079
|
|
1. 0.5 0.77459667 0.8819171 ]
|
|
|
|
mean value: 0.727168811414926
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 0.8125 0.875 0.6875 0.8125 1. 0.75 0.875 0.9375]
|
|
|
|
mean value: 0.85625
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.88888889 0.76923077 0.875 0.61538462 0.8
|
|
1. 0.75 0.85714286 0.93333333]
|
|
|
|
mean value: 0.8422313797313797
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.8 1. 0.875 0.8 0.85714286
|
|
1. 0.75 1. 1. ]
|
|
|
|
mean value: 0.9082142857142858
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 1. 0.625 0.875 0.5 0.75 1. 0.75 0.75 0.875]
|
|
|
|
mean value: 0.8
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.875 0.8125 0.875 0.6875 0.8125 1. 0.75 0.875 0.9375]
|
|
|
|
mean value: 0.85625
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.8 0.625 0.77777778 0.44444444 0.66666667
|
|
1. 0.6 0.75 0.875 ]
|
|
|
|
mean value: 0.7413888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.64
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01722026 0.01831508 0.01930213 0.01839018 0.0184226 0.01807404
|
|
0.01835561 0.01866198 0.0186429 0.01855826]
|
|
|
|
mean value: 0.01839430332183838
|
|
|
|
key: score_time
|
|
value: [0.01260185 0.0122304 0.01495647 0.01476097 0.01510453 0.01543903
|
|
0.01523733 0.01575661 0.01501632 0.01562715]
|
|
|
|
mean value: 0.014673066139221192
|
|
|
|
key: test_mcc
|
|
value: [ 0.12598816 0. 0.25 -0.16012815 -0.25819889 0.25819889
|
|
0.16012815 0.5 0.25 -0.37796447]
|
|
|
|
mean value: 0.07480236846605152
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.5 0.625 0.4375 0.375 0.625 0.5625 0.75 0.625 0.3125]
|
|
|
|
mean value: 0.5375
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.58823529 0.42857143 0.625 0.18181818 0.28571429 0.66666667
|
|
0.36363636 0.75 0.625 0.26666667]
|
|
|
|
mean value: 0.478130888719124
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.55555556 0.5 0.625 0.33333333 0.33333333 0.6
|
|
0.66666667 0.75 0.625 0.28571429]
|
|
|
|
mean value: 0.5274603174603174
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.375 0.625 0.125 0.25 0.75 0.25 0.75 0.625 0.25 ]
|
|
|
|
mean value: 0.4625
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.5 0.625 0.4375 0.375 0.625 0.5625 0.75 0.625 0.3125]
|
|
|
|
mean value: 0.5375
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.41666667 0.27272727 0.45454545 0.1 0.16666667 0.5
|
|
0.22222222 0.6 0.45454545 0.15384615]
|
|
|
|
mean value: 0.3341219891219891
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02254725 0.01390481 0.01392627 0.01405835 0.01374555 0.03056836
|
|
0.02901173 0.03391171 0.03406215 0.03388691]
|
|
|
|
mean value: 0.02396230697631836
|
|
|
|
key: score_time
|
|
value: [0.02385044 0.01221228 0.01202631 0.01211524 0.01238918 0.0221839
|
|
0.02213025 0.02046323 0.0226171 0.02082515]
|
|
|
|
mean value: 0.018081307411193848
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.13483997 0.51639778 0.67419986 0.25819889 0.37796447
|
|
1. 0.40451992 1. 0.62994079]
|
|
|
|
mean value: 0.5877978786721455
|
|
|
|
key: train_mcc
|
|
value: [0.87576054 0.91702052 0.94480902 0.88923202 0.91702052 0.90286486
|
|
0.86144352 0.93064532 0.87508441 0.88923202]
|
|
|
|
mean value: 0.9003112770766342
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.5625 0.75 0.8125 0.625 0.6875 1. 0.6875 1. 0.8125]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_accuracy
|
|
value: [0.9375 0.95833333 0.97222222 0.94444444 0.95833333 0.95138889
|
|
0.93055556 0.96527778 0.9375 0.94444444]
|
|
|
|
mean value: 0.95
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.46153846 0.71428571 0.76923077 0.57142857 0.70588235
|
|
1. 0.73684211 1. 0.82352941]
|
|
|
|
mean value: 0.771607071978589
|
|
|
|
key: train_fscore
|
|
value: [0.93617021 0.95774648 0.97183099 0.94366197 0.95774648 0.95104895
|
|
0.92957746 0.96503497 0.93706294 0.94366197]
|
|
|
|
mean value: 0.9493542418025487
|
|
|
|
key: test_precision
|
|
value: [1. 0.6 0.83333333 1. 0.66666667 0.66666667
|
|
1. 0.63636364 1. 0.77777778]
|
|
|
|
mean value: 0.8180808080808081
|
|
|
|
key: train_precision
|
|
value: [0.95652174 0.97142857 0.98571429 0.95714286 0.97142857 0.95774648
|
|
0.94285714 0.97183099 0.94366197 0.95714286]
|
|
|
|
mean value: 0.9615475461464439
|
|
|
|
key: test_recall
|
|
value: [0.875 0.375 0.625 0.625 0.5 0.75 1. 0.875 1. 0.875]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.94444444 0.95833333 0.93055556 0.94444444 0.94444444
|
|
0.91666667 0.95833333 0.93055556 0.93055556]
|
|
|
|
mean value: 0.9375
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.5625 0.75 0.8125 0.625 0.6875 1. 0.6875 1. 0.8125]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_roc_auc /home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:176: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:179: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
value: [0.9375 0.95833333 0.97222222 0.94444444 0.95833333 0.95138889
|
|
0.93055556 0.96527778 0.9375 0.94444444]
|
|
|
|
mean value: 0.95
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.3 0.55555556 0.625 0.4 0.54545455
|
|
1. 0.58333333 1. 0.7 ]
|
|
|
|
mean value: 0.6584343434343435
|
|
|
|
key: train_jcc
|
|
value: [0.88 0.91891892 0.94520548 0.89333333 0.91891892 0.90666667
|
|
0.86842105 0.93243243 0.88157895 0.89333333]
|
|
|
|
mean value: 0.9038809083055659
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25951099 0.18207598 0.20796013 0.20773888 0.22549558 0.20481324
|
|
0.20497799 0.20447183 0.20572758 0.31128216]
|
|
|
|
mean value: 0.22140543460845946
|
|
|
|
key: score_time
|
|
value: [0.01242137 0.02315378 0.02255774 0.02248621 0.02380848 0.01925349
|
|
0.01931071 0.01924753 0.01924086 0.02288103]
|
|
|
|
mean value: 0.02043612003326416
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.37796447 0.37796447 0.5 0.25819889 0.37796447
|
|
0.8819171 0.40451992 1. 0.62994079]
|
|
|
|
mean value: 0.5690387221977894
|
|
|
|
key: train_mcc
|
|
value: [0.87576054 0.8201567 0.79235477 0.75116009 0.81057464 0.90286486
|
|
0.70840166 0.93064532 0.71004752 0.73675093]
|
|
|
|
mean value: 0.8038717035752984
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.6875 0.6875 0.75 0.625 0.6875 0.9375 0.6875 1. 0.8125]
|
|
|
|
mean value: 0.78125
|
|
|
|
key: train_accuracy
|
|
value: [0.9375 0.90972222 0.89583333 0.875 0.90277778 0.95138889
|
|
0.85416667 0.96527778 0.85416667 0.86805556]
|
|
|
|
mean value: 0.9013888888888889
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.66666667 0.66666667 0.75 0.57142857 0.70588235
|
|
0.94117647 0.73684211 1. 0.82352941]
|
|
|
|
mean value: 0.7795525578652513
|
|
|
|
key: train_fscore
|
|
value: [0.93617021 0.91156463 0.89361702 0.87142857 0.89705882 0.95104895
|
|
0.85314685 0.96503497 0.84892086 0.86524823]
|
|
|
|
mean value: 0.8993239114341353
|
|
|
|
key: test_precision
|
|
value: [1. 0.71428571 0.71428571 0.75 0.66666667 0.66666667
|
|
0.88888889 0.63636364 1. 0.77777778]
|
|
|
|
mean value: 0.7814935064935065
|
|
|
|
key: train_precision
|
|
value: [0.95652174 0.89333333 0.91304348 0.89705882 0.953125 0.95774648
|
|
0.85915493 0.97183099 0.88059701 0.88405797]
|
|
|
|
mean value: 0.9166469754560113
|
|
|
|
key: test_recall
|
|
value: [0.875 0.625 0.625 0.75 0.5 0.75 1. 0.875 1. 0.875]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_recall
|
|
value: [0.91666667 0.93055556 0.875 0.84722222 0.84722222 0.94444444
|
|
0.84722222 0.95833333 0.81944444 0.84722222]
|
|
|
|
mean value: 0.8833333333333333
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.6875 0.6875 0.75 0.625 0.6875 0.9375 0.6875 1. 0.8125]
|
|
|
|
mean value: 0.78125
|
|
|
|
key: train_roc_auc
|
|
value: [0.9375 0.90972222 0.89583333 0.875 0.90277778 0.95138889
|
|
0.85416667 0.96527778 0.85416667 0.86805556]
|
|
|
|
mean value: 0.9013888888888889
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.5 0.5 0.6 0.4 0.54545455
|
|
0.88888889 0.58333333 1. 0.7 ]
|
|
|
|
mean value: 0.6592676767676767
|
|
|
|
key: train_jcc
|
|
value: [0.88 0.8375 0.80769231 0.7721519 0.81333333 0.90666667
|
|
0.74390244 0.93243243 0.7375 0.7625 ]
|
|
|
|
mean value: 0.8193679077883308
|
|
|
|
MCC on Blind test: 0.5
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05355716 0.0426147 0.04525471 0.04337955 0.04229355 0.04319
|
|
0.04874849 0.06639647 0.04266381 0.04316974]
|
|
|
|
mean value: 0.04712681770324707
|
|
|
|
key: score_time
|
|
value: [0.01504159 0.01252651 0.01494384 0.01514077 0.01492548 0.01555014
|
|
0.01502299 0.0126729 0.0126493 0.01543117]
|
|
|
|
mean value: 0.01439046859741211
|
|
|
|
key: test_mcc
|
|
value: [0.75755102 0.75826874 0.65956077 0.77795918 0.81836735 0.68137582
|
|
0.80073891 0.72213485 0.85732141 0.80195322]
|
|
|
|
mean value: 0.7635231270226952
|
|
|
|
key: train_mcc
|
|
value: [0.83392846 0.85687843 0.80432263 0.82243655 0.8178772 0.83603091
|
|
0.81332692 0.84928914 0.82698509 0.82713547]
|
|
|
|
mean value: 0.8288210806571223
|
|
|
|
key: test_accuracy
|
|
value: [0.87878788 0.87878788 0.82828283 0.88888889 0.90909091 0.83838384
|
|
0.8989899 0.85858586 0.92857143 0.89795918]
|
|
|
|
mean value: 0.8806328592042878
|
|
|
|
key: train_accuracy
|
|
value: [0.9167604 0.928009 0.90213723 0.91113611 0.90888639 0.91788526
|
|
0.90663667 0.92463442 0.91348315 0.91348315]
|
|
|
|
mean value: 0.9143051781448668
|
|
|
|
key: test_fscore
|
|
value: [0.87755102 0.88 0.8172043 0.88888889 0.90909091 0.8490566
|
|
0.89583333 0.86792453 0.92783505 0.90384615]
|
|
|
|
mean value: 0.8817230790264581
|
|
|
|
key: train_fscore
|
|
value: [0.91814159 0.92967033 0.9027933 0.91212458 0.90949721 0.91879867
|
|
0.90705487 0.92480359 0.9137738 0.91434928]
|
|
|
|
mean value: 0.9151007209295261
|
|
|
|
key: test_precision
|
|
value: [0.87755102 0.8627451 0.86363636 0.88 0.91836735 0.80357143
|
|
0.93478261 0.82142857 0.9375 0.85454545]
|
|
|
|
mean value: 0.8754127892263625
|
|
|
|
key: train_precision
|
|
value: [0.90413943 0.90967742 0.89777778 0.9030837 0.90243902 0.90769231
|
|
0.90200445 0.92170022 0.91071429 0.90528634]
|
|
|
|
mean value: 0.9064514970590146
|
|
|
|
key: test_recall
|
|
value: [0.87755102 0.89795918 0.7755102 0.89795918 0.9 0.9
|
|
0.86 0.92 0.91836735 0.95918367]
|
|
|
|
mean value: 0.8906530612244898
|
|
|
|
key: train_recall
|
|
value: [0.93258427 0.9505618 0.90786517 0.92134831 0.91666667 0.93018018
|
|
0.91216216 0.92792793 0.91685393 0.92359551]
|
|
|
|
mean value: 0.9239745925700982
|
|
|
|
key: test_roc_auc
|
|
value: [0.87877551 0.87897959 0.8277551 0.88897959 0.90918367 0.8377551
|
|
0.89938776 0.85795918 0.92857143 0.89795918]
|
|
|
|
mean value: 0.8805306122448979
|
|
|
|
key: train_roc_auc
|
|
value: [0.91674259 0.9279836 0.90213078 0.91112461 0.90889513 0.91789908
|
|
0.90664288 0.92463812 0.91348315 0.91348315]
|
|
|
|
mean value: 0.9143023079259034
|
|
|
|
key: test_jcc
|
|
value: [0.78181818 0.78571429 0.69090909 0.8 0.83333333 0.73770492
|
|
0.81132075 0.76666667 0.86538462 0.8245614 ]
|
|
|
|
mean value: 0.7897413250084714
|
|
|
|
key: train_jcc
|
|
value: [0.84867076 0.86858316 0.82281059 0.83844581 0.83401639 0.84979424
|
|
0.82991803 0.86012526 0.84123711 0.84221311]
|
|
|
|
mean value: 0.8435814471295331
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.22463059 1.05570817 1.11256909 1.0186007 1.19873643 1.05704951
|
|
1.126719 1.08294725 1.1935997 1.06516957]
|
|
|
|
mean value: 1.1135730028152466
|
|
|
|
key: score_time
|
|
value: [0.01515532 0.01636577 0.01565552 0.01576257 0.01560903 0.01364231
|
|
0.01548839 0.01544881 0.01260805 0.01562142]
|
|
|
|
mean value: 0.015135717391967774
|
|
|
|
key: test_mcc
|
|
value: [0.863122 0.82254789 0.88543774 0.863122 0.88156478 0.84441196
|
|
0.96036035 0.885171 0.92144268 0.90267093]
|
|
|
|
mean value: 0.8829851338336264
|
|
|
|
key: train_mcc
|
|
value: [0.94912147 0.97759151 0.97116812 0.97095201 0.94246459 0.94696837
|
|
0.9516751 0.97116999 0.96466869 0.96033651]
|
|
|
|
mean value: 0.9606116358627318
|
|
|
|
key: test_accuracy
|
|
value: [0.92929293 0.90909091 0.93939394 0.92929293 0.93939394 0.91919192
|
|
0.97979798 0.93939394 0.95918367 0.94897959]
|
|
|
|
mean value: 0.9393011750154607
|
|
|
|
key: train_accuracy
|
|
value: [0.97412823 0.98875141 0.98537683 0.98537683 0.97075366 0.97300337
|
|
0.97525309 0.98537683 0.98202247 0.97977528]
|
|
|
|
mean value: 0.9799818000278055
|
|
|
|
key: test_fscore
|
|
value: [0.93203883 0.91262136 0.94230769 0.93203883 0.94230769 0.9245283
|
|
0.98039216 0.94339623 0.96078431 0.95145631]
|
|
|
|
mean value: 0.9421871723311332
|
|
|
|
key: train_fscore
|
|
value: [0.97469747 0.98883929 0.98560354 0.98553949 0.97136564 0.97356828
|
|
0.97582418 0.98557159 0.98233996 0.98017621]
|
|
|
|
mean value: 0.9803525638482767
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.87037037 0.89090909 0.88888889 0.90740741 0.875
|
|
0.96153846 0.89285714 0.9245283 0.90740741]
|
|
|
|
mean value: 0.9007795960154451
|
|
|
|
key: train_precision
|
|
value: [0.95474138 0.98226164 0.97161572 0.97577093 0.95043103 0.95258621
|
|
0.9527897 0.97155361 0.96529284 0.96112311]
|
|
|
|
mean value: 0.9638166168995906
|
|
|
|
key: test_recall
|
|
value: [0.97959184 0.95918367 1. 0.97959184 0.98 0.98
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9878367346938776
|
|
|
|
key: train_recall
|
|
value: [0.99550562 0.99550562 1. 0.99550562 0.99324324 0.9954955
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9975255592671323
|
|
|
|
key: test_roc_auc
|
|
value: [0.92979592 0.90959184 0.94 0.92979592 0.93897959 0.91857143
|
|
0.97959184 0.93877551 0.95918367 0.94897959]
|
|
|
|
mean value: 0.9393265306122449
|
|
|
|
key: train_roc_auc
|
|
value: [0.97410416 0.9887438 0.98536036 0.98536542 0.97077892 0.97302865
|
|
0.9752809 0.98539326 0.98202247 0.97977528]
|
|
|
|
mean value: 0.9799853224010527
|
|
|
|
key: test_jcc
|
|
value: [0.87272727 0.83928571 0.89090909 0.87272727 0.89090909 0.85964912
|
|
0.96153846 0.89285714 0.9245283 0.90740741]
|
|
|
|
mean value: 0.8912538878055263
|
|
|
|
key: train_jcc
|
|
value: [0.95064378 0.97792494 0.97161572 0.97149123 0.94432548 0.94849785
|
|
0.9527897 0.97155361 0.96529284 0.96112311]
|
|
|
|
mean value: 0.9615258267980433
|
|
|
|
MCC on Blind test: 0.61
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01673555 0.01243305 0.01208019 0.01189065 0.01195526 0.0122056
|
|
0.01233721 0.01249337 0.01222706 0.01216221]
|
|
|
|
mean value: 0.012652015686035157
|
|
|
|
key: score_time
|
|
value: [0.01284385 0.00966644 0.00941014 0.00928617 0.00931907 0.0094986
|
|
0.0094707 0.0094533 0.00949907 0.00988865]
|
|
|
|
mean value: 0.009833598136901855
|
|
|
|
key: test_mcc
|
|
value: [0.57824506 0.64061678 0.55578301 0.63812011 0.55594991 0.55614541
|
|
0.69714286 0.67715621 0.68262876 0.57154761]
|
|
|
|
mean value: 0.6153335712933342
|
|
|
|
key: train_mcc
|
|
value: [0.66803577 0.63702506 0.67487913 0.64768179 0.59692104 0.63878315
|
|
0.62719788 0.66348921 0.63064577 0.63975648]
|
|
|
|
mean value: 0.6424415281429017
|
|
|
|
key: test_accuracy
|
|
value: [0.78787879 0.81818182 0.77777778 0.81818182 0.76767677 0.77777778
|
|
0.84848485 0.83838384 0.83673469 0.78571429]
|
|
|
|
mean value: 0.8056792413935271
|
|
|
|
key: train_accuracy
|
|
value: [0.83352081 0.81777278 0.83689539 0.82339708 0.79190101 0.81889764
|
|
0.81214848 0.83127109 0.81348315 0.81910112]
|
|
|
|
mean value: 0.8198388544128613
|
|
|
|
key: test_fscore
|
|
value: [0.77419355 0.80434783 0.77083333 0.80851064 0.73563218 0.7755102
|
|
0.84848485 0.84313725 0.82222222 0.78787879]
|
|
|
|
mean value: 0.7970750847582757
|
|
|
|
key: train_fscore
|
|
value: [0.82909931 0.81162791 0.83236994 0.8189158 0.7672956 0.81344148
|
|
0.80236686 0.82638889 0.80285036 0.81257276]
|
|
|
|
mean value: 0.8116928906740686
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.86046512 0.78723404 0.84444444 0.86486486 0.79166667
|
|
0.85714286 0.82692308 0.90243902 0.78 ]
|
|
|
|
mean value: 0.8333361911446233
|
|
|
|
key: train_precision
|
|
value: [0.85273159 0.84096386 0.85714286 0.84123223 0.86894587 0.83770883
|
|
0.84538653 0.85 0.85138539 0.84299517]
|
|
|
|
mean value: 0.8488492324172594
|
|
|
|
key: test_recall
|
|
value: [0.73469388 0.75510204 0.75510204 0.7755102 0.64 0.76
|
|
0.84 0.86 0.75510204 0.79591837]
|
|
|
|
mean value: 0.7671428571428571
|
|
|
|
key: train_recall
|
|
value: [0.80674157 0.78426966 0.80898876 0.79775281 0.68693694 0.79054054
|
|
0.76351351 0.80405405 0.75955056 0.78426966]
|
|
|
|
mean value: 0.778661807875291
|
|
|
|
key: test_roc_auc
|
|
value: [0.78734694 0.81755102 0.77755102 0.8177551 0.76897959 0.77795918
|
|
0.84857143 0.83816327 0.83673469 0.78571429]
|
|
|
|
mean value: 0.8056326530612244
|
|
|
|
key: train_roc_auc
|
|
value: [0.83355097 0.81781051 0.83692681 0.82342595 0.79178308 0.81886578
|
|
0.81209384 0.83124051 0.81348315 0.81910112]
|
|
|
|
mean value: 0.8198281708674967
|
|
|
|
key: test_jcc
|
|
value: [0.63157895 0.67272727 0.62711864 0.67857143 0.58181818 0.63333333
|
|
0.73684211 0.72881356 0.69811321 0.65 ]
|
|
|
|
mean value: 0.6638916680018796
|
|
|
|
key: train_jcc
|
|
value: [0.70808679 0.68297456 0.71287129 0.69335938 0.62244898 0.68554688
|
|
0.66996047 0.70414201 0.67063492 0.68431373]
|
|
|
|
mean value: 0.6834338993685036
|
|
|
|
MCC on Blind test: 0.48
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01247883 0.01675415 0.01677966 0.016675 0.01677823 0.01672053
|
|
0.01712465 0.01683545 0.01670694 0.01662993]
|
|
|
|
mean value: 0.01634833812713623
|
|
|
|
key: score_time
|
|
value: [0.01236534 0.01252604 0.01248479 0.01251769 0.01245928 0.01246166
|
|
0.01251173 0.01245975 0.01247382 0.01249337]
|
|
|
|
mean value: 0.012475347518920899
|
|
|
|
key: test_mcc
|
|
value: [0.61702314 0.35410133 0.57574525 0.55578301 0.53600816 0.5355102
|
|
0.57574525 0.53600816 0.49071649 0.39069516]
|
|
|
|
mean value: 0.5167336156862262
|
|
|
|
key: train_mcc
|
|
value: [0.51879992 0.54796119 0.5549901 0.5413091 0.54798635 0.53665365
|
|
0.53880726 0.552346 0.57200244 0.55281038]
|
|
|
|
mean value: 0.5463666402457225
|
|
|
|
key: test_accuracy
|
|
value: [0.80808081 0.67676768 0.78787879 0.77777778 0.76767677 0.76767677
|
|
0.78787879 0.76767677 0.74489796 0.69387755]
|
|
|
|
mean value: 0.7580189651618223
|
|
|
|
key: train_accuracy
|
|
value: [0.75928009 0.77390326 0.77727784 0.77052868 0.77390326 0.76827897
|
|
0.76940382 0.77615298 0.78539326 0.77640449]
|
|
|
|
mean value: 0.7730526661695378
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.65957447 0.78350515 0.77083333 0.77669903 0.76767677
|
|
0.79207921 0.77669903 0.75247525 0.71153846]
|
|
|
|
mean value: 0.7591080698970816
|
|
|
|
key: train_fscore
|
|
value: [0.76327434 0.77691454 0.78193833 0.77433628 0.77641824 0.77008929
|
|
0.76888388 0.77715566 0.7921654 0.77665544]
|
|
|
|
mean value: 0.775783138689622
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.68888889 0.79166667 0.78723404 0.75471698 0.7755102
|
|
0.78431373 0.75471698 0.73076923 0.67272727]
|
|
|
|
mean value: 0.7566630949962969
|
|
|
|
key: train_precision
|
|
value: [0.75163399 0.76754386 0.76673866 0.76252723 0.76703297 0.76327434
|
|
0.76975169 0.77282851 0.76793249 0.77578475]
|
|
|
|
mean value: 0.7665048487528039
|
|
|
|
key: test_recall
|
|
value: [0.7755102 0.63265306 0.7755102 0.75510204 0.8 0.76
|
|
0.8 0.8 0.7755102 0.75510204]
|
|
|
|
mean value: 0.7629387755102041
|
|
|
|
key: train_recall
|
|
value: [0.7752809 0.78651685 0.79775281 0.78651685 0.78603604 0.77702703
|
|
0.76801802 0.78153153 0.81797753 0.77752809]
|
|
|
|
mean value: 0.7854185646320477
|
|
|
|
key: test_roc_auc
|
|
value: [0.8077551 0.67632653 0.7877551 0.77755102 0.76734694 0.7677551
|
|
0.7877551 0.76734694 0.74489796 0.69387755]
|
|
|
|
mean value: 0.7578367346938776
|
|
|
|
key: train_roc_auc
|
|
value: [0.75926207 0.77388906 0.77725478 0.77051068 0.77391689 0.76828879
|
|
0.76940227 0.77615902 0.78539326 0.77640449]
|
|
|
|
mean value: 0.7730481324020649
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.49206349 0.6440678 0.62711864 0.63492063 0.62295082
|
|
0.6557377 0.63492063 0.6031746 0.55223881]
|
|
|
|
mean value: 0.613385980298431
|
|
|
|
key: train_jcc
|
|
value: [0.61717352 0.63520871 0.64195298 0.63176895 0.63454545 0.6261343
|
|
0.62454212 0.63553114 0.65585586 0.63486239]
|
|
|
|
mean value: 0.6337575429443841
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.0157733 0.01145649 0.01214123 0.01273537 0.01268053 0.01320958
|
|
0.01308846 0.01329494 0.01179957 0.01297712]
|
|
|
|
mean value: 0.012915658950805663
|
|
|
|
key: score_time
|
|
value: [0.04137635 0.01387143 0.01484156 0.0155642 0.01554847 0.01563334
|
|
0.01554394 0.01566815 0.01872492 0.01584148]
|
|
|
|
mean value: 0.01826138496398926
|
|
|
|
key: test_mcc
|
|
value: [0.6862556 0.77394006 0.83232122 0.80412203 0.87944488 0.65507603
|
|
0.68524605 0.66706065 0.80674308 0.76200076]
|
|
|
|
mean value: 0.7552210371121403
|
|
|
|
key: train_mcc
|
|
value: [0.86692101 0.85450918 0.85790797 0.85336191 0.84288854 0.84679753
|
|
0.82469214 0.84383391 0.8430316 0.87210157]
|
|
|
|
mean value: 0.8506045353237754
|
|
|
|
key: test_accuracy
|
|
value: [0.83838384 0.87878788 0.90909091 0.8989899 0.93939394 0.81818182
|
|
0.83838384 0.82828283 0.89795918 0.86734694]
|
|
|
|
mean value: 0.871480107194393
|
|
|
|
key: train_accuracy
|
|
value: [0.93025872 0.9223847 0.92463442 0.9223847 0.9167604 0.92013498
|
|
0.90776153 0.91901012 0.91685393 0.93258427]
|
|
|
|
mean value: 0.9212767786049215
|
|
|
|
key: test_fscore
|
|
value: [0.8490566 0.88888889 0.91588785 0.90384615 0.94117647 0.83928571
|
|
0.85185185 0.8440367 0.90566038 0.88288288]
|
|
|
|
mean value: 0.8822573491190798
|
|
|
|
key: train_fscore
|
|
value: [0.93432203 0.92789969 0.9296957 0.92759706 0.92243187 0.92470838
|
|
0.91404612 0.92340426 0.92259414 0.93657505]
|
|
|
|
mean value: 0.9263274295497499
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.81355932 0.84482759 0.85454545 0.92307692 0.75806452
|
|
0.79310345 0.77966102 0.84210526 0.79032258]
|
|
|
|
mean value: 0.8188739795230802
|
|
|
|
key: train_precision
|
|
value: [0.88376754 0.8671875 0.87204724 0.87007874 0.8627451 0.87374749
|
|
0.85490196 0.875 0.8630137 0.88423154]
|
|
|
|
mean value: 0.8706720808691903
|
|
|
|
key: test_recall
|
|
value: [0.91836735 0.97959184 1. 0.95918367 0.96 0.94
|
|
0.92 0.92 0.97959184 1. ]
|
|
|
|
mean value: 0.9576734693877551
|
|
|
|
key: train_recall
|
|
value: [0.99101124 0.99775281 0.99550562 0.99325843 0.99099099 0.98198198
|
|
0.98198198 0.97747748 0.99101124 0.99550562]
|
|
|
|
mean value: 0.9896477376252657
|
|
|
|
key: test_roc_auc
|
|
value: [0.83918367 0.87979592 0.91 0.89959184 0.93918367 0.81693878
|
|
0.83755102 0.82734694 0.89795918 0.86734694]
|
|
|
|
mean value: 0.8714897959183673
|
|
|
|
key: train_roc_auc
|
|
value: [0.9301903 0.92229983 0.92455461 0.92230489 0.91684381 0.92020447
|
|
0.90784492 0.91907582 0.91685393 0.93258427]
|
|
|
|
mean value: 0.9212756857981578
|
|
|
|
key: test_jcc
|
|
value: [0.73770492 0.8 0.84482759 0.8245614 0.88888889 0.72307692
|
|
0.74193548 0.73015873 0.82758621 0.79032258]
|
|
|
|
mean value: 0.7909062721285678
|
|
|
|
key: train_jcc
|
|
value: [0.87673956 0.86549708 0.86862745 0.86497065 0.85603113 0.85996055
|
|
0.84169884 0.85770751 0.85631068 0.88071571]
|
|
|
|
mean value: 0.8628259153050839
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05458593 0.05406904 0.05278516 0.05480218 0.0511229 0.05297947
|
|
0.05459261 0.05324531 0.04908705 0.05352235]
|
|
|
|
mean value: 0.05307919979095459
|
|
|
|
key: score_time
|
|
value: [0.02032709 0.01914358 0.02031136 0.01740026 0.01839948 0.02088666
|
|
0.02111769 0.02110386 0.01895499 0.01804757]
|
|
|
|
mean value: 0.019569253921508788
|
|
|
|
key: test_mcc
|
|
value: [0.71784408 0.71784408 0.81956057 0.79795918 0.79869341 0.73744392
|
|
0.87877551 0.70021862 0.88048967 0.81786082]
|
|
|
|
mean value: 0.7866689859957828
|
|
|
|
key: train_mcc
|
|
value: [0.83144516 0.84027229 0.84027229 0.8335206 0.82232177 0.829152
|
|
0.82452171 0.84028769 0.81797959 0.8204319 ]
|
|
|
|
mean value: 0.8300205014140227
|
|
|
|
key: test_accuracy
|
|
value: [0.85858586 0.85858586 0.90909091 0.8989899 0.8989899 0.86868687
|
|
0.93939394 0.84848485 0.93877551 0.90816327]
|
|
|
|
mean value: 0.8927746856318285
|
|
|
|
key: train_accuracy
|
|
value: [0.91563555 0.92013498 0.92013498 0.9167604 0.91113611 0.91451069
|
|
0.91226097 0.92013498 0.90898876 0.91011236]
|
|
|
|
mean value: 0.9149809785012828
|
|
|
|
key: test_fscore
|
|
value: [0.86 0.86 0.90526316 0.89795918 0.89795918 0.87128713
|
|
0.94 0.85714286 0.94117647 0.91089109]
|
|
|
|
mean value: 0.894167907079455
|
|
|
|
key: train_fscore
|
|
value: [0.91486947 0.92013498 0.92013498 0.91685393 0.91053228 0.91363636
|
|
0.91216216 0.91977401 0.90888639 0.90909091]
|
|
|
|
mean value: 0.9146075477074722
|
|
|
|
key: test_precision
|
|
value: [0.84313725 0.84313725 0.93478261 0.89795918 0.91666667 0.8627451
|
|
0.94 0.81818182 0.90566038 0.88461538]
|
|
|
|
mean value: 0.8846885647034619
|
|
|
|
key: train_precision
|
|
value: [0.92431193 0.92117117 0.92117117 0.91685393 0.91571754 0.92201835
|
|
0.91216216 0.92290249 0.90990991 0.91954023]
|
|
|
|
mean value: 0.9185758886307491
|
|
|
|
key: test_recall
|
|
value: [0.87755102 0.87755102 0.87755102 0.89795918 0.88 0.88
|
|
0.94 0.9 0.97959184 0.93877551]
|
|
|
|
mean value: 0.9048979591836734
|
|
|
|
key: train_recall
|
|
value: [0.90561798 0.91910112 0.91910112 0.91685393 0.90540541 0.90540541
|
|
0.91216216 0.91666667 0.90786517 0.8988764 ]
|
|
|
|
mean value: 0.9107055369976719
|
|
|
|
key: test_roc_auc
|
|
value: [0.85877551 0.85877551 0.90877551 0.89897959 0.89918367 0.86857143
|
|
0.93938776 0.84795918 0.93877551 0.90816327]
|
|
|
|
mean value: 0.892734693877551
|
|
|
|
key: train_roc_auc
|
|
value: [0.91564683 0.92013615 0.92013615 0.9167603 0.91112967 0.91450046
|
|
0.91226086 0.92013109 0.90898876 0.91011236]
|
|
|
|
mean value: 0.9149802611600364
|
|
|
|
key: test_jcc
|
|
value: [0.75438596 0.75438596 0.82692308 0.81481481 0.81481481 0.77192982
|
|
0.88679245 0.75 0.88888889 0.83636364]
|
|
|
|
mean value: 0.8099299439021386
|
|
|
|
key: train_jcc
|
|
value: [0.84309623 0.85208333 0.85208333 0.84647303 0.83575884 0.84100418
|
|
0.83850932 0.85146444 0.83298969 0.83333333]
|
|
|
|
mean value: 0.84267957258528
|
|
|
|
MCC on Blind test: 0.52
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [3.71795654 3.27859187 3.56246805 3.77698517 3.33423853 2.46210384
|
|
3.60218835 3.56319189 2.56194735 4.78781986]
|
|
|
|
mean value: 3.4647491455078123
|
|
|
|
key: score_time
|
|
value: [0.03088045 0.01269913 0.03797531 0.01509118 0.01293278 0.01283407
|
|
0.01515484 0.02330518 0.01723456 0.0213201 ]
|
|
|
|
mean value: 0.019942760467529297
|
|
|
|
key: test_mcc
|
|
value: [0.96039208 0.90369611 0.94115314 0.96039208 0.96036035 0.92213889
|
|
0.97999192 0.92213889 0.94053994 0.8660254 ]
|
|
|
|
mean value: 0.9356828798247384
|
|
|
|
key: train_mcc
|
|
value: [1. 0.9977528 1. 0.9977528 0.99775281 0.99775281
|
|
0.99775281 1. 0.993281 0.99775533]
|
|
|
|
mean value: 0.9979800349421086
|
|
|
|
key: test_accuracy
|
|
value: [0.97979798 0.94949495 0.96969697 0.97979798 0.97979798 0.95959596
|
|
0.98989899 0.95959596 0.96938776 0.92857143]
|
|
|
|
mean value: 0.9665635951350237
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99887514 1. 0.99887514 0.99887514 0.99887514
|
|
0.99887514 1. 0.99662921 0.9988764 ]
|
|
|
|
mean value: 0.9989881321014649
|
|
|
|
key: test_fscore
|
|
value: [0.98 0.95145631 0.97029703 0.98 0.98039216 0.96153846
|
|
0.99009901 0.96153846 0.97029703 0.93333333]
|
|
|
|
mean value: 0.9678951793259544
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99887767 1. 0.99887767 0.99887514 0.99887514
|
|
0.99887514 1. 0.99664054 0.99887767]
|
|
|
|
mean value: 0.9989898955969266
|
|
|
|
key: test_precision
|
|
value: [0.96078431 0.90740741 0.94230769 0.96078431 0.96153846 0.92592593
|
|
0.98039216 0.92592593 0.94230769 0.875 ]
|
|
|
|
mean value: 0.938237388972683
|
|
|
|
key: train_precision
|
|
value: [1. 0.99775785 1. 0.99775785 0.99775281 0.99775281
|
|
0.99775281 1. 0.99330357 0.99775785]
|
|
|
|
mean value: 0.9979835540995761
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 0.95 0.97 0.98 0.97959184 0.95918367
|
|
0.98979592 0.95918367 0.96938776 0.92857143]
|
|
|
|
mean value: 0.9665714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99887387 1. 0.99887387 0.9988764 0.9988764
|
|
0.9988764 1. 0.99662921 0.9988764 ]
|
|
|
|
mean value: 0.9989882579208422
|
|
|
|
key: test_jcc
|
|
value: [0.96078431 0.90740741 0.94230769 0.96078431 0.96153846 0.92592593
|
|
0.98039216 0.92592593 0.94230769 0.875 ]
|
|
|
|
mean value: 0.938237388972683
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99775785 1. 0.99775785 0.99775281 0.99775281
|
|
0.99775281 1. 0.99330357 0.99775785]
|
|
|
|
mean value: 0.9979835540995761
|
|
|
|
MCC on Blind test: 0.49
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.05787063 0.05731821 0.04956865 0.05259037 0.0504818 0.05536056
|
|
0.03977418 0.04540706 0.0632062 0.05131841]
|
|
|
|
mean value: 0.052289605140686035
|
|
|
|
key: score_time
|
|
value: [0.01011372 0.01066351 0.01463532 0.00981522 0.01499319 0.00951672
|
|
0.00926733 0.01265788 0.0130794 0.01543713]
|
|
|
|
mean value: 0.012017941474914551
|
|
|
|
key: test_mcc
|
|
value: [0.92226137 0.92226137 0.96039208 0.96039208 0.92213889 0.92213889
|
|
0.92213889 0.97999192 0.92144268 0.95998366]
|
|
|
|
mean value: 0.9393141823207555
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.95959596 0.97979798 0.97979798 0.95959596 0.95959596
|
|
0.95959596 0.98989899 0.95918367 0.97959184]
|
|
|
|
mean value: 0.9686250257678829
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96078431 0.96078431 0.98 0.98 0.96153846 0.96153846
|
|
0.96153846 0.99009901 0.96078431 0.98 ]
|
|
|
|
mean value: 0.9697067335692845
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.9245283 0.9245283 0.96078431 0.96078431 0.92592593 0.92592593
|
|
0.92592593 0.98039216 0.9245283 0.96078431]
|
|
|
|
mean value: 0.9414107781477371
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.96 0.98 0.98 0.95918367 0.95918367
|
|
0.95918367 0.98979592 0.95918367 0.97959184]
|
|
|
|
mean value: 0.9686122448979593
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.9245283 0.9245283 0.96078431 0.96078431 0.92592593 0.92592593
|
|
0.92592593 0.98039216 0.9245283 0.96078431]
|
|
|
|
mean value: 0.9414107781477371
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.17448545 0.19207025 0.1727097 0.18104172 0.19200134 0.19790864
|
|
0.17134237 0.18381476 0.1848731 0.17243171]
|
|
|
|
mean value: 0.1822679042816162
|
|
|
|
key: score_time
|
|
value: [0.01846647 0.02434683 0.0196991 0.02066755 0.02106452 0.02380848
|
|
0.01863456 0.02005315 0.02629566 0.01876903]
|
|
|
|
mean value: 0.02118053436279297
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 1. 1. 0.97999192
|
|
0.97999192 0.94108303 0.9797959 1. ]
|
|
|
|
mean value: 0.9880862759663566
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.98989899
|
|
0.98989899 0.96969697 0.98979592 1. ]
|
|
|
|
mean value: 0.9939290867862296
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 1. 1. 0.99009901
|
|
0.99009901 0.97087379 0.98989899 1. ]
|
|
|
|
mean value: 0.9940970796108737
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 0.98039216
|
|
0.98039216 0.94339623 0.98 1. ]
|
|
|
|
mean value: 0.9884180540140585
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.98979592
|
|
0.98979592 0.96938776 0.98979592 1. ]
|
|
|
|
mean value: 0.9938775510204082
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 1. 1. 0.98039216
|
|
0.98039216 0.94339623 0.98 1. ]
|
|
|
|
mean value: 0.9884180540140585
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.87
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01277018 0.01284671 0.02041531 0.01383948 0.01337767 0.01296997
|
|
0.02047944 0.0129292 0.0132153 0.01236629]
|
|
|
|
mean value: 0.014520955085754395
|
|
|
|
key: score_time
|
|
value: [0.00934219 0.00950646 0.01441789 0.00991201 0.00925684 0.00997806
|
|
0.01434565 0.00993395 0.01181316 0.00959873]
|
|
|
|
mean value: 0.010810494422912598
|
|
|
|
key: test_mcc
|
|
value: [0.83232122 0.92226137 0.86746758 0.88543774 0.9035079 0.81441102
|
|
0.94108303 0.885171 0.8660254 0.88420483]
|
|
|
|
mean value: 0.880189110548122
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.90909091 0.95959596 0.92929293 0.93939394 0.94949495 0.8989899
|
|
0.96969697 0.93939394 0.92857143 0.93877551]
|
|
|
|
mean value: 0.9362296433725006
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91588785 0.96078431 0.93333333 0.94230769 0.95238095 0.90909091
|
|
0.97087379 0.94339623 0.93333333 0.94230769]
|
|
|
|
mean value: 0.9403696089769554
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.84482759 0.9245283 0.875 0.89090909 0.90909091 0.83333333
|
|
0.94339623 0.89285714 0.875 0.89090909]
|
|
|
|
mean value: 0.8879851681608351
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.91 0.96 0.93 0.94 0.94897959 0.89795918
|
|
0.96938776 0.93877551 0.92857143 0.93877551]
|
|
|
|
mean value: 0.9362448979591836
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.84482759 0.9245283 0.875 0.89090909 0.90909091 0.83333333
|
|
0.94339623 0.89285714 0.875 0.89090909]
|
|
|
|
mean value: 0.8879851681608351
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.62646651 2.31779599 2.29431295 2.30238032 2.36595178 2.29143786
|
|
2.33263278 2.30914521 2.32489729 2.39071798]
|
|
|
|
mean value: 2.355573868751526
|
|
|
|
key: score_time
|
|
value: [0.09789824 0.09828162 0.09867167 0.10346723 0.09860706 0.10229325
|
|
0.10256696 0.09777188 0.10007524 0.09858131]
|
|
|
|
mean value: 0.09982144832611084
|
|
|
|
key: test_mcc
|
|
value: [0.96039208 1. 0.98 0.96039208 0.97999192 1.
|
|
0.97999192 0.96036035 0.9797959 0.9797959 ]
|
|
|
|
mean value: 0.9780720123789145
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.97979798 1. 0.98989899 0.97979798 0.98989899 1.
|
|
0.98989899 0.97979798 0.98979592 0.98979592]
|
|
|
|
mean value: 0.9888682745825603
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.98 1. 0.98989899 0.98 0.99009901 1.
|
|
0.99009901 0.98039216 0.98989899 0.98989899]
|
|
|
|
mean value: 0.9890287146361695
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.96078431 1. 0.98 0.96078431 0.98039216 1.
|
|
0.98039216 0.96153846 0.98 0.98 ]
|
|
|
|
mean value: 0.9783891402714933
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.98 1. 0.99 0.98 0.98979592 1.
|
|
0.98979592 0.97959184 0.98979592 0.98979592]
|
|
|
|
mean value: 0.9888775510204082
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.96078431 1. 0.98 0.96078431 0.98039216 1.
|
|
0.98039216 0.96153846 0.98 0.98 ]
|
|
|
|
mean value: 0.9783891402714933
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.07864785 1.05706811 1.1116693 1.12475038 1.08170772 1.13533378
|
|
1.06463242 1.17062831 1.10141206 1.09471107]
|
|
|
|
mean value: 1.102056097984314
|
|
|
|
key: score_time
|
|
value: [0.21096325 0.24873376 0.24595237 0.28230929 0.28896403 0.28917456
|
|
0.27740908 0.28838801 0.13431239 0.27159071]
|
|
|
|
mean value: 0.2537797451019287
|
|
|
|
key: test_mcc
|
|
value: [0.92226137 0.96036035 0.96039208 0.96039208 0.97999192 0.97999192
|
|
0.97999192 0.94108303 0.9797959 0.94053994]
|
|
|
|
mean value: 0.9604800488955269
|
|
|
|
key: train_mcc
|
|
value: [0.99104115 0.98204181 0.98659135 0.98881381 0.98881409 0.98881409
|
|
0.98881409 0.99327351 0.98882646 0.98882646]
|
|
|
|
mean value: 0.9885856840386122
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.97979798 0.97979798 0.97979798 0.98989899 0.98989899
|
|
0.98989899 0.96969697 0.98979592 0.96938776]
|
|
|
|
mean value: 0.9797567511853226
|
|
|
|
key: train_accuracy
|
|
value: [0.99550056 0.99100112 0.99325084 0.9943757 0.9943757 0.9943757
|
|
0.9943757 0.99662542 0.99438202 0.99438202]
|
|
|
|
mean value: 0.9942644809848207
|
|
|
|
key: test_fscore
|
|
value: [0.96078431 0.97916667 0.98 0.98 0.99009901 0.99009901
|
|
0.99009901 0.97087379 0.98989899 0.97029703]
|
|
|
|
mean value: 0.9801317816104854
|
|
|
|
key: train_fscore
|
|
value: [0.99552573 0.99105145 0.99330357 0.99441341 0.9944009 0.9944009
|
|
0.9944009 0.996633 0.99441341 0.99441341]
|
|
|
|
mean value: 0.9942956660303298
|
|
|
|
key: test_precision
|
|
value: [0.9245283 1. 0.96078431 0.96078431 0.98039216 0.98039216
|
|
0.98039216 0.94339623 0.98 0.94230769]
|
|
|
|
mean value: 0.9652977318648794
|
|
|
|
key: train_precision
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.99109131 0.98663697 0.98669623 0.98888889 0.98886414 0.98886414
|
|
0.98886414 0.99328859 0.98888889 0.98888889]
|
|
|
|
mean value: 0.9890972200564241
|
|
|
|
key: test_recall
|
|
value: [1. 0.95918367 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9959183673469387
|
|
|
|
key: train_recall
|
|
value: [1. 0.99550562 1. 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9995505617977528
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.97959184 0.98 0.98 0.98979592 0.98979592
|
|
0.98979592 0.96938776 0.98979592 0.96938776]
|
|
|
|
mean value: 0.9797551020408164
|
|
|
|
key: train_roc_auc
|
|
value: [0.9954955 0.99099605 0.99324324 0.99436937 0.99438202 0.99438202
|
|
0.99438202 0.99662921 0.99438202 0.99438202]
|
|
|
|
mean value: 0.9942643486182812
|
|
|
|
key: test_jcc
|
|
value: [0.9245283 0.95918367 0.96078431 0.96078431 0.98039216 0.98039216
|
|
0.98039216 0.94339623 0.98 0.94230769]
|
|
|
|
mean value: 0.9612160992118183
|
|
|
|
key: train_jcc
|
|
value: [0.99109131 0.98226164 0.98669623 0.98888889 0.98886414 0.98886414
|
|
0.98886414 0.99328859 0.98888889 0.98888889]
|
|
|
|
mean value: 0.9886596870315696
|
|
|
|
MCC on Blind test: 0.62
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02818155 0.01640964 0.0166142 0.01636291 0.01646543 0.01653767
|
|
0.01663399 0.01648474 0.01645207 0.0167706 ]
|
|
|
|
mean value: 0.017691278457641603
|
|
|
|
key: score_time
|
|
value: [0.01294327 0.01235938 0.01253033 0.0123024 0.01231956 0.012398
|
|
0.0123713 0.01232553 0.01236725 0.01237607]
|
|
|
|
mean value: 0.012429308891296387
|
|
|
|
key: test_mcc
|
|
value: [0.61702314 0.35410133 0.57574525 0.55578301 0.53600816 0.5355102
|
|
0.57574525 0.53600816 0.49071649 0.39069516]
|
|
|
|
mean value: 0.5167336156862262
|
|
|
|
key: train_mcc
|
|
value: [0.51879992 0.54796119 0.5549901 0.5413091 0.54798635 0.53665365
|
|
0.53880726 0.552346 0.57200244 0.55281038]
|
|
|
|
mean value: 0.5463666402457225
|
|
|
|
key: test_accuracy
|
|
value: [0.80808081 0.67676768 0.78787879 0.77777778 0.76767677 0.76767677
|
|
0.78787879 0.76767677 0.74489796 0.69387755]
|
|
|
|
mean value: 0.7580189651618223
|
|
|
|
key: train_accuracy
|
|
value: [0.75928009 0.77390326 0.77727784 0.77052868 0.77390326 0.76827897
|
|
0.76940382 0.77615298 0.78539326 0.77640449]
|
|
|
|
mean value: 0.7730526661695378
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.65957447 0.78350515 0.77083333 0.77669903 0.76767677
|
|
0.79207921 0.77669903 0.75247525 0.71153846]
|
|
|
|
mean value: 0.7591080698970816
|
|
|
|
key: train_fscore
|
|
value: [0.76327434 0.77691454 0.78193833 0.77433628 0.77641824 0.77008929
|
|
0.76888388 0.77715566 0.7921654 0.77665544]
|
|
|
|
mean value: 0.775783138689622
|
|
|
|
key: test_precision
|
|
value: [0.82608696 0.68888889 0.79166667 0.78723404 0.75471698 0.7755102
|
|
0.78431373 0.75471698 0.73076923 0.67272727]
|
|
|
|
mean value: 0.7566630949962969
|
|
|
|
key: train_precision
|
|
value: [0.75163399 0.76754386 0.76673866 0.76252723 0.76703297 0.76327434
|
|
0.76975169 0.77282851 0.76793249 0.77578475]
|
|
|
|
mean value: 0.7665048487528039
|
|
|
|
key: test_recall
|
|
value: [0.7755102 0.63265306 0.7755102 0.75510204 0.8 0.76
|
|
0.8 0.8 0.7755102 0.75510204]
|
|
|
|
mean value: 0.7629387755102041
|
|
|
|
key: train_recall
|
|
value: [0.7752809 0.78651685 0.79775281 0.78651685 0.78603604 0.77702703
|
|
0.76801802 0.78153153 0.81797753 0.77752809]
|
|
|
|
mean value: 0.7854185646320477
|
|
|
|
key: test_roc_auc
|
|
value: [0.8077551 0.67632653 0.7877551 0.77755102 0.76734694 0.7677551
|
|
0.7877551 0.76734694 0.74489796 0.69387755]
|
|
|
|
mean value: 0.7578367346938776
|
|
|
|
key: train_roc_auc
|
|
value: [0.75926207 0.77388906 0.77725478 0.77051068 0.77391689 0.76828879
|
|
0.76940227 0.77615902 0.78539326 0.77640449]
|
|
|
|
mean value: 0.7730481324020649
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.49206349 0.6440678 0.62711864 0.63492063 0.62295082
|
|
0.6557377 0.63492063 0.6031746 0.55223881]
|
|
|
|
mean value: 0.613385980298431
|
|
|
|
key: train_jcc
|
|
value: [0.61717352 0.63520871 0.64195298 0.63176895 0.63454545 0.6261343
|
|
0.62454212 0.63553114 0.65585586 0.63486239]
|
|
|
|
mean value: 0.6337575429443841
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.13117743 0.13017297 0.10528946 0.11773849 0.13207746 0.12107038
|
|
0.19286537 0.10384011 0.12658334 0.10978913]
|
|
|
|
mean value: 0.1270604133605957
|
|
|
|
key: score_time
|
|
value: [0.01237822 0.01131606 0.01131964 0.01251054 0.01123238 0.01286054
|
|
0.01153684 0.01139259 0.01172996 0.01139951]
|
|
|
|
mean value: 0.01176762580871582
|
|
|
|
key: test_mcc
|
|
value: [0.94115314 0.96039208 0.96039208 0.96039208 0.97999192 0.97999192
|
|
0.96036035 0.885171 0.9797959 0.9797959 ]
|
|
|
|
mean value: 0.9587436338055487
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.97979798 0.97979798 0.97979798 0.98989899 0.98989899
|
|
0.97979798 0.93939394 0.98979592 0.98979592]
|
|
|
|
mean value: 0.9787672644815502
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.97029703 0.98 0.98 0.98 0.99009901 0.99009901
|
|
0.98039216 0.94339623 0.98989899 0.98989899]
|
|
|
|
mean value: 0.979408141258077
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.94230769 0.96078431 0.96078431 0.96078431 0.98039216 0.98039216
|
|
0.96153846 0.89285714 0.98 0.98 ]
|
|
|
|
mean value: 0.9599840551605258
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97 0.98 0.98 0.98 0.98979592 0.98979592
|
|
0.97959184 0.93877551 0.98979592 0.98979592]
|
|
|
|
mean value: 0.9787551020408163
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.94230769 0.96078431 0.96078431 0.96078431 0.98039216 0.98039216
|
|
0.96153846 0.89285714 0.98 0.98 ]
|
|
|
|
mean value: 0.9599840551605258
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.69
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05922556 0.11751652 0.09628844 0.06397629 0.09017968 0.08379126
|
|
0.05699015 0.09830332 0.09346771 0.09379196]
|
|
|
|
mean value: 0.08535308837890625
|
|
|
|
key: score_time
|
|
value: [0.01922965 0.03424406 0.0131371 0.01926708 0.01913023 0.01251912
|
|
0.01680779 0.01920915 0.02351069 0.01251936]
|
|
|
|
mean value: 0.01895742416381836
|
|
|
|
key: test_mcc
|
|
value: [0.76346418 0.81977994 0.78197378 0.80412203 0.83898714 0.84441196
|
|
0.94108303 0.84930737 0.83953666 0.80195322]
|
|
|
|
mean value: 0.8284619318227261
|
|
|
|
key: train_mcc
|
|
value: [0.87979116 0.89404404 0.89095088 0.90018119 0.88453645 0.87870015
|
|
0.87629918 0.89546866 0.87307705 0.88188139]
|
|
|
|
mean value: 0.8854930153854812
|
|
|
|
key: test_accuracy
|
|
value: [0.87878788 0.90909091 0.88888889 0.8989899 0.91919192 0.91919192
|
|
0.96969697 0.91919192 0.91836735 0.89795918]
|
|
|
|
mean value: 0.9119356833642548
|
|
|
|
key: train_accuracy
|
|
value: [0.93925759 0.94600675 0.94488189 0.94938133 0.94150731 0.93813273
|
|
0.93700787 0.94713161 0.93595506 0.94044944]
|
|
|
|
mean value: 0.9419711580996195
|
|
|
|
key: test_fscore
|
|
value: [0.88461538 0.91089109 0.89320388 0.90384615 0.92156863 0.9245283
|
|
0.97087379 0.92592593 0.92156863 0.90384615]
|
|
|
|
mean value: 0.9160867934034195
|
|
|
|
key: train_fscore
|
|
value: [0.94091904 0.94782609 0.94633078 0.95081967 0.94310722 0.9402823
|
|
0.93913043 0.94840834 0.93756846 0.94182217]
|
|
|
|
mean value: 0.9436214503135185
|
|
|
|
key: test_precision
|
|
value: [0.83636364 0.88461538 0.85185185 0.85454545 0.90384615 0.875
|
|
0.94339623 0.86206897 0.88679245 0.85454545]
|
|
|
|
mean value: 0.875302558053046
|
|
|
|
key: train_precision
|
|
value: [0.91684435 0.91789474 0.92307692 0.92553191 0.91702128 0.90775681
|
|
0.90756303 0.92505353 0.91452991 0.92060086]
|
|
|
|
mean value: 0.9175873345805426
|
|
|
|
key: test_recall
|
|
value: [0.93877551 0.93877551 0.93877551 0.95918367 0.94 0.98
|
|
1. 1. 0.95918367 0.95918367]
|
|
|
|
mean value: 0.9613877551020408
|
|
|
|
key: train_recall
|
|
value: [0.96629213 0.97977528 0.97078652 0.97752809 0.97072072 0.97522523
|
|
0.97297297 0.97297297 0.96179775 0.96404494]
|
|
|
|
mean value: 0.9712116610993016
|
|
|
|
key: test_roc_auc
|
|
value: [0.87938776 0.90938776 0.88938776 0.89959184 0.91897959 0.91857143
|
|
0.96938776 0.91836735 0.91836735 0.89795918]
|
|
|
|
mean value: 0.9119387755102041
|
|
|
|
key: train_roc_auc
|
|
value: [0.93922715 0.94596872 0.94485272 0.94934963 0.94154014 0.93817441
|
|
0.93704828 0.94716064 0.93595506 0.94044944]
|
|
|
|
mean value: 0.9419726186861018
|
|
|
|
key: test_jcc
|
|
value: [0.79310345 0.83636364 0.80701754 0.8245614 0.85454545 0.85964912
|
|
0.94339623 0.86206897 0.85454545 0.8245614 ]
|
|
|
|
mean value: 0.8459812659346954
|
|
|
|
key: train_jcc
|
|
value: [0.88842975 0.90082645 0.8981289 0.90625 0.89233954 0.88729508
|
|
0.8852459 0.90187891 0.88247423 0.89004149]
|
|
|
|
mean value: 0.8932910259581088
|
|
|
|
MCC on Blind test: 0.51
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0293839 0.01589632 0.01593113 0.01555037 0.01585388 0.01575375
|
|
0.01572251 0.01554537 0.01546693 0.01594353]
|
|
|
|
mean value: 0.01710476875305176
|
|
|
|
key: score_time
|
|
value: [0.01209593 0.01213789 0.0121007 0.01235938 0.01234531 0.01209593
|
|
0.01211119 0.01222897 0.01218843 0.01219249]
|
|
|
|
mean value: 0.012185621261596679
|
|
|
|
key: test_mcc
|
|
value: [0.57824506 0.51572075 0.61890291 0.61616992 0.66025145 0.53532058
|
|
0.61632653 0.58471775 0.61339562 0.49071649]
|
|
|
|
mean value: 0.5829767050379736
|
|
|
|
key: train_mcc
|
|
value: [0.59077841 0.58865181 0.60854844 0.59954955 0.6176094 0.60413883
|
|
0.59065462 0.6041919 0.55281038 0.62030147]
|
|
|
|
mean value: 0.5977234805514439
|
|
|
|
key: test_accuracy
|
|
value: [0.78787879 0.75757576 0.80808081 0.80808081 0.82828283 0.76767677
|
|
0.80808081 0.78787879 0.80612245 0.74489796]
|
|
|
|
mean value: 0.7904555761698618
|
|
|
|
key: train_accuracy
|
|
value: [0.79527559 0.79415073 0.80427447 0.79977503 0.8087739 0.80202475
|
|
0.79527559 0.80202475 0.77640449 0.81011236]
|
|
|
|
mean value: 0.798809165708219
|
|
|
|
key: test_fscore
|
|
value: [0.77419355 0.76 0.79569892 0.80412371 0.82105263 0.77227723
|
|
0.80808081 0.80733945 0.8 0.75247525]
|
|
|
|
mean value: 0.789524154890705
|
|
|
|
key: train_fscore
|
|
value: [0.79271071 0.79085714 0.80449438 0.8 0.8098434 0.8
|
|
0.796875 0.79954442 0.77615298 0.80860702]
|
|
|
|
mean value: 0.7979085053006724
|
|
|
|
key: test_precision
|
|
value: [0.81818182 0.74509804 0.84090909 0.8125 0.86666667 0.76470588
|
|
0.81632653 0.74576271 0.82608696 0.73076923]
|
|
|
|
mean value: 0.7967006927093825
|
|
|
|
key: train_precision
|
|
value: [0.80369515 0.80465116 0.80449438 0.8 0.80444444 0.80733945
|
|
0.78982301 0.80875576 0.77702703 0.81506849]
|
|
|
|
mean value: 0.8015298878310305
|
|
|
|
key: test_recall
|
|
value: [0.73469388 0.7755102 0.75510204 0.79591837 0.78 0.78
|
|
0.8 0.88 0.7755102 0.7755102 ]
|
|
|
|
mean value: 0.7852244897959184
|
|
|
|
key: train_recall
|
|
value: [0.78202247 0.77752809 0.80449438 0.8 0.81531532 0.79279279
|
|
0.80405405 0.79054054 0.7752809 0.80224719]
|
|
|
|
mean value: 0.7944275736410568
|
|
|
|
key: test_roc_auc
|
|
value: [0.78734694 0.7577551 0.80755102 0.80795918 0.82877551 0.76755102
|
|
0.80816327 0.78693878 0.80612245 0.74489796]
|
|
|
|
mean value: 0.7903061224489796
|
|
|
|
key: train_roc_auc
|
|
value: [0.79529052 0.79416945 0.80427422 0.79977477 0.80878125 0.80201437
|
|
0.79528545 0.80201184 0.77640449 0.81011236]
|
|
|
|
mean value: 0.7988118736714243
|
|
|
|
key: test_jcc
|
|
value: [0.63157895 0.61290323 0.66071429 0.67241379 0.69642857 0.62903226
|
|
0.6779661 0.67692308 0.66666667 0.6031746 ]
|
|
|
|
mean value: 0.6527801529944957
|
|
|
|
key: train_jcc
|
|
value: [0.65660377 0.65406427 0.67293233 0.66666667 0.68045113 0.66666667
|
|
0.66233766 0.66603416 0.63419118 0.67870722]
|
|
|
|
mean value: 0.663865505651715
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03000593 0.03333426 0.03544641 0.04262209 0.0304172 0.02759886
|
|
0.0276804 0.03472829 0.04165053 0.03193879]
|
|
|
|
mean value: 0.03354227542877197
|
|
|
|
key: score_time
|
|
value: [0.01206589 0.01223135 0.01211286 0.01212573 0.0123148 0.01230645
|
|
0.01247859 0.01220012 0.01218987 0.01221323]
|
|
|
|
mean value: 0.01222388744354248
|
|
|
|
key: test_mcc
|
|
value: [0.78197378 0.71597164 0.74087648 0.74833148 0.86006806 0.75574269
|
|
0.87877551 0.76360355 0.85875386 0.84811452]
|
|
|
|
mean value: 0.7952211566132422
|
|
|
|
key: train_mcc
|
|
value: [0.82253009 0.76734816 0.84554941 0.81435405 0.85014535 0.81815045
|
|
0.82880275 0.89396754 0.8271369 0.84602394]
|
|
|
|
mean value: 0.8314008650553109
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.83838384 0.86868687 0.85858586 0.92929293 0.86868687
|
|
0.93939394 0.86868687 0.92857143 0.91836735]
|
|
|
|
mean value: 0.8907544836116265
|
|
|
|
key: train_accuracy
|
|
value: [0.90776153 0.87064117 0.9223847 0.89876265 0.92350956 0.90438695
|
|
0.91338583 0.94488189 0.91235955 0.92022472]
|
|
|
|
mean value: 0.9118298555377207
|
|
|
|
key: test_fscore
|
|
value: [0.89320388 0.85964912 0.86021505 0.875 0.93203883 0.88288288
|
|
0.94 0.88495575 0.92631579 0.9245283 ]
|
|
|
|
mean value: 0.8978789621472809
|
|
|
|
key: train_fscore
|
|
value: [0.91350211 0.88557214 0.92078071 0.90816327 0.92656587 0.91099476
|
|
0.91621328 0.94736842 0.9088785 0.92454835]
|
|
|
|
mean value: 0.9162587419108582
|
|
|
|
key: test_precision
|
|
value: [0.85185185 0.75384615 0.90909091 0.77777778 0.90566038 0.80327869
|
|
0.94 0.79365079 0.95652174 0.85964912]
|
|
|
|
mean value: 0.8551327414038019
|
|
|
|
key: train_precision
|
|
value: [0.86083499 0.79464286 0.94131455 0.8317757 0.89004149 0.85127202
|
|
0.88631579 0.90554415 0.94647202 0.87701613]
|
|
|
|
mean value: 0.8785229697373805
|
|
|
|
key: test_recall
|
|
value: [0.93877551 1. 0.81632653 1. 0.96 0.98
|
|
0.94 1. 0.89795918 1. ]
|
|
|
|
mean value: 0.9533061224489796
|
|
|
|
key: train_recall
|
|
value: [0.97303371 1. 0.9011236 1. 0.96621622 0.97972973
|
|
0.9481982 0.99324324 0.8741573 0.97752809]
|
|
|
|
mean value: 0.9613230084016601
|
|
|
|
key: test_roc_auc
|
|
value: [0.88938776 0.84 0.86816327 0.86 0.92897959 0.86755102
|
|
0.93938776 0.86734694 0.92857143 0.91836735]
|
|
|
|
mean value: 0.8907755102040816
|
|
|
|
key: train_roc_auc
|
|
value: [0.90768803 0.8704955 0.92240864 0.89864865 0.92355755 0.90447161
|
|
0.91342494 0.94493623 0.91235955 0.92022472]
|
|
|
|
mean value: 0.9118215406417653
|
|
|
|
key: test_jcc
|
|
value: [0.80701754 0.75384615 0.75471698 0.77777778 0.87272727 0.79032258
|
|
0.88679245 0.79365079 0.8627451 0.85964912]
|
|
|
|
mean value: 0.8159245777315306
|
|
|
|
key: train_jcc
|
|
value: [0.8407767 0.79464286 0.85319149 0.8317757 0.86317907 0.83653846
|
|
0.84538153 0.9 0.83297645 0.85968379]
|
|
|
|
mean value: 0.8458146048420373
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0337882 0.0325408 0.0330236 0.03169346 0.03819156 0.04255176
|
|
0.03328037 0.02651882 0.03501749 0.03868747]
|
|
|
|
mean value: 0.03452935218811035
|
|
|
|
key: score_time
|
|
value: [0.01231146 0.01219511 0.0120821 0.01211023 0.01220703 0.0121212
|
|
0.01211905 0.01218486 0.01206303 0.0121963 ]
|
|
|
|
mean value: 0.012159037590026855
|
|
|
|
key: test_mcc
|
|
value: [0.67265715 0.73171698 0.84477989 0.45794488 0.885171 0.69701925
|
|
0.86710997 0.66607486 0.90267093 0.83743255]
|
|
|
|
mean value: 0.7562577464086364
|
|
|
|
key: train_mcc
|
|
value: [0.78603499 0.80449849 0.8759883 0.45080088 0.84926073 0.82276562
|
|
0.87102657 0.77674596 0.86121018 0.88796336]
|
|
|
|
mean value: 0.7986295090845719
|
|
|
|
key: test_accuracy
|
|
value: [0.82828283 0.85858586 0.91919192 0.68686869 0.93939394 0.84848485
|
|
0.92929293 0.80808081 0.94897959 0.91836735]
|
|
|
|
mean value: 0.8685528756957328
|
|
|
|
key: train_accuracy
|
|
value: [0.88751406 0.8976378 0.93475816 0.67154106 0.91901012 0.90888639
|
|
0.93138358 0.87626547 0.9258427 0.94382022]
|
|
|
|
mean value: 0.8896659546770137
|
|
|
|
key: test_fscore
|
|
value: [0.8045977 0.84090909 0.92307692 0.55072464 0.94339623 0.85148515
|
|
0.93457944 0.84033613 0.95145631 0.92 ]
|
|
|
|
mean value: 0.8560561612132274
|
|
|
|
key: train_fscore
|
|
value: [0.87745098 0.88942892 0.93855932 0.51495017 0.925 0.9034565
|
|
0.93572181 0.88977956 0.93096234 0.94305239]
|
|
|
|
mean value: 0.8748361989406058
|
|
|
|
key: test_precision
|
|
value: [0.92105263 0.94871795 0.87272727 0.95 0.89285714 0.84313725
|
|
0.87719298 0.72463768 0.90740741 0.90196078]
|
|
|
|
mean value: 0.8839691106119966
|
|
|
|
key: train_precision
|
|
value: [0.96495957 0.96825397 0.88777555 0.98726115 0.86046512 0.95949367
|
|
0.87920792 0.80144404 0.87084149 0.95612009]
|
|
|
|
mean value: 0.9135822565523263
|
|
|
|
key: test_recall
|
|
value: [0.71428571 0.75510204 0.97959184 0.3877551 1. 0.86
|
|
1. 1. 1. 0.93877551]
|
|
|
|
mean value: 0.8635510204081632
|
|
|
|
key: train_recall
|
|
value: [0.80449438 0.82247191 0.99550562 0.34831461 1. 0.8536036
|
|
1. 1. 1. 0.93033708]
|
|
|
|
mean value: 0.8754727199109221
|
|
|
|
key: test_roc_auc
|
|
value: [0.82714286 0.85755102 0.91979592 0.68387755 0.93877551 0.84836735
|
|
0.92857143 0.80612245 0.94897959 0.91836735]
|
|
|
|
mean value: 0.8677551020408163
|
|
|
|
key: train_roc_auc
|
|
value: [0.88760755 0.89772244 0.93468975 0.67190505 0.91910112 0.90882427
|
|
0.93146067 0.87640449 0.9258427 0.94382022]
|
|
|
|
mean value: 0.8897378277153558
|
|
|
|
key: test_jcc
|
|
value: [0.67307692 0.7254902 0.85714286 0.38 0.89285714 0.74137931
|
|
0.87719298 0.72463768 0.90740741 0.85185185]
|
|
|
|
mean value: 0.7631036352375002
|
|
|
|
key: train_jcc
|
|
value: [0.78165939 0.80087527 0.88423154 0.34675615 0.86046512 0.82391304
|
|
0.87920792 0.80144404 0.87084149 0.89224138]
|
|
|
|
mean value: 0.794163534168159
|
|
|
|
MCC on Blind test: 0.6
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.26037502 0.24532795 0.24475813 0.24432278 0.24556231 0.24404931
|
|
0.24527717 0.24619651 0.2461195 0.24500632]
|
|
|
|
mean value: 0.24669950008392333
|
|
|
|
key: score_time
|
|
value: [0.01581287 0.01601648 0.01595807 0.01607084 0.01604128 0.01613355
|
|
0.01604629 0.01603842 0.01622629 0.01639223]
|
|
|
|
mean value: 0.01607363224029541
|
|
|
|
key: test_mcc
|
|
value: [0.92226137 0.86023767 0.94115314 0.90369611 0.89914258 0.92213889
|
|
0.96036035 0.885171 0.94053994 0.90267093]
|
|
|
|
mean value: 0.9137371991506337
|
|
|
|
key: train_mcc
|
|
value: [0.97555115 0.96654926 0.96462817 0.97335731 0.97095332 0.9733589
|
|
0.97116999 0.98437429 0.97338769 0.95979293]
|
|
|
|
mean value: 0.9713123026673283
|
|
|
|
key: test_accuracy
|
|
value: [0.95959596 0.92929293 0.96969697 0.94949495 0.94949495 0.95959596
|
|
0.97979798 0.93939394 0.96938776 0.94897959]
|
|
|
|
mean value: 0.9554730983302412
|
|
|
|
key: train_accuracy
|
|
value: [0.98762655 0.98312711 0.98200225 0.98650169 0.98537683 0.98650169
|
|
0.98537683 0.99212598 0.98651685 0.97977528]
|
|
|
|
mean value: 0.9854931054966444
|
|
|
|
key: test_fscore
|
|
value: [0.96078431 0.93069307 0.97029703 0.95145631 0.95049505 0.96153846
|
|
0.98039216 0.94339623 0.97029703 0.95145631]
|
|
|
|
mean value: 0.9570805958118836
|
|
|
|
key: train_fscore
|
|
value: [0.98779134 0.98335183 0.98233996 0.98669623 0.98550725 0.98666667
|
|
0.98557159 0.99217877 0.98669623 0.98 ]
|
|
|
|
mean value: 0.9856799862416676
|
|
|
|
key: test_precision
|
|
value: [0.9245283 0.90384615 0.94230769 0.90740741 0.94117647 0.92592593
|
|
0.96153846 0.89285714 0.94230769 0.90740741]
|
|
|
|
mean value: 0.9249302656072912
|
|
|
|
key: train_precision
|
|
value: [0.97587719 0.97149123 0.96529284 0.97374179 0.97571744 0.97368421
|
|
0.97155361 0.98447894 0.97374179 0.96923077]
|
|
|
|
mean value: 0.9734809816575078
|
|
|
|
key: test_recall
|
|
value: [1. 0.95918367 1. 1. 0.96 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9919183673469387
|
|
|
|
key: train_recall
|
|
value: [1. 0.99550562 1. 1. 0.9954955 1.
|
|
1. 1. 1. 0.99101124]
|
|
|
|
mean value: 0.998201234942808
|
|
|
|
key: test_roc_auc
|
|
value: [0.96 0.92959184 0.97 0.95 0.94938776 0.95918367
|
|
0.97959184 0.93877551 0.96938776 0.94897959]
|
|
|
|
mean value: 0.9554897959183674
|
|
|
|
key: train_roc_auc
|
|
value: [0.98761261 0.98311317 0.98198198 0.98648649 0.9853882 0.98651685
|
|
0.98539326 0.99213483 0.98651685 0.97977528]
|
|
|
|
mean value: 0.9854919526267841
|
|
|
|
key: test_jcc
|
|
value: [0.9245283 0.87037037 0.94230769 0.90740741 0.90566038 0.92592593
|
|
0.96153846 0.89285714 0.94230769 0.90740741]
|
|
|
|
mean value: 0.9180310779367383
|
|
|
|
key: train_jcc
|
|
value: [0.97587719 0.96724891 0.96529284 0.97374179 0.97142857 0.97368421
|
|
0.97155361 0.98447894 0.97374179 0.96078431]
|
|
|
|
mean value: 0.9717832173431541
|
|
|
|
MCC on Blind test: 0.59
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.20892882 0.19680214 0.22129178 0.215662 0.20441914 0.21925974
|
|
0.21086812 0.20510578 0.20530629 0.2027185 ]
|
|
|
|
mean value: 0.2090362310409546
|
|
|
|
key: score_time
|
|
value: [0.01893711 0.02750826 0.03674531 0.03300762 0.03347588 0.04089451
|
|
0.0351193 0.03215599 0.04165864 0.03740096]
|
|
|
|
mean value: 0.033690357208251955
|
|
|
|
key: test_mcc
|
|
value: [0.94115314 0.96039208 0.96039208 0.96039208 0.96036035 0.92213889
|
|
0.92213889 0.94108303 0.9797959 0.9797959 ]
|
|
|
|
mean value: 0.9527642325496656
|
|
|
|
key: train_mcc
|
|
value: [0.99551061 0.99327341 0.9977528 1. 0.99551066 0.99775281
|
|
0.99775281 0.99551066 0.99551567 0.99105127]
|
|
|
|
mean value: 0.9959630693327268
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.97979798 0.97979798 0.97979798 0.97979798 0.95959596
|
|
0.95959596 0.96969697 0.98979592 0.98979592]
|
|
|
|
mean value: 0.9757369614512471
|
|
|
|
key: train_accuracy
|
|
value: [0.99775028 0.99662542 0.99887514 1. 0.99775028 0.99887514
|
|
0.99887514 0.99775028 0.99775281 0.99550562]
|
|
|
|
mean value: 0.997976011425538
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.97029703 0.98 0.98 0.98 0.98039216 0.96153846
|
|
0.96153846 0.97087379 0.98989899 0.98989899]
|
|
|
|
mean value: 0.9764437875848385
|
|
|
|
key: train_fscore
|
|
value: [0.99775785 0.99664054 0.99887767 1. 0.99775281 0.99887514
|
|
0.99887514 0.99775281 0.99775785 0.99552573]
|
|
|
|
mean value: 0.9979815524387322
|
|
|
|
key: test_precision
|
|
value: [0.94230769 0.96078431 0.96078431 0.96078431 0.96153846 0.92592593
|
|
0.92592593 0.94339623 0.98 0.98 ]
|
|
|
|
mean value: 0.9541447173289571
|
|
|
|
key: train_precision
|
|
value: [0.99552573 0.99330357 0.99775785 1. 0.9955157 0.99775281
|
|
0.99775281 0.9955157 0.99552573 0.99109131]
|
|
|
|
mean value: 0.9959741195244144
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97 0.98 0.98 0.98 0.97959184 0.95918367
|
|
0.95918367 0.96938776 0.98979592 0.98979592]
|
|
|
|
mean value: 0.9756938775510204
|
|
|
|
key: train_roc_auc
|
|
value: [0.99774775 0.99662162 0.99887387 1. 0.99775281 0.9988764
|
|
0.9988764 0.99775281 0.99775281 0.99550562]
|
|
|
|
mean value: 0.9979760097175827
|
|
|
|
key: test_jcc
|
|
value: [0.94230769 0.96078431 0.96078431 0.96078431 0.96153846 0.92592593
|
|
0.92592593 0.94339623 0.98 0.98 ]
|
|
|
|
mean value: 0.9541447173289571
|
|
|
|
key: train_jcc
|
|
value: [0.99552573 0.99330357 0.99775785 1. 0.9955157 0.99775281
|
|
0.99775281 0.9955157 0.99552573 0.99109131]
|
|
|
|
mean value: 0.9959741195244144
|
|
|
|
MCC on Blind test: 0.63
|
|
|
|
Accuracy on Blind test: 0.91
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.41119599 0.36338758 0.36235762 0.43186092 0.38388443 0.37863398
|
|
0.4725759 0.39496422 0.41371608 0.3934772 ]
|
|
|
|
mean value: 0.4006053924560547
|
|
|
|
key: score_time
|
|
value: [0.0217042 0.02159715 0.02159238 0.04457092 0.02958322 0.03729844
|
|
0.02161241 0.05810499 0.04006124 0.05123425]
|
|
|
|
mean value: 0.034735918045043945
|
|
|
|
key: test_mcc
|
|
value: [0.88543774 0.94115314 0.92226137 0.90369611 1. 0.9035079
|
|
0.94108303 0.81441102 0.92144268 0.90267093]
|
|
|
|
mean value: 0.9135663931799242
|
|
|
|
key: train_mcc
|
|
value: [0.99104115 0.98881381 0.99104115 0.99104115 0.99104133 0.99104133
|
|
0.98659176 0.98881409 0.98660654 0.98660654]
|
|
|
|
mean value: 0.9892638854610041
|
|
|
|
key: test_accuracy
|
|
value: [0.93939394 0.96969697 0.95959596 0.94949495 1. 0.94949495
|
|
0.96969697 0.8989899 0.95918367 0.94897959]
|
|
|
|
mean value: 0.9544526901669759
|
|
|
|
key: train_accuracy
|
|
value: [0.99550056 0.9943757 0.99550056 0.99550056 0.99550056 0.99550056
|
|
0.99325084 0.9943757 0.99325843 0.99325843]
|
|
|
|
mean value: 0.9946021915799851
|
|
|
|
key: test_fscore
|
|
value: [0.94230769 0.97029703 0.96078431 0.95145631 1. 0.95238095
|
|
0.97087379 0.90909091 0.96078431 0.95145631]
|
|
|
|
mean value: 0.9569431618700495
|
|
|
|
key: train_fscore
|
|
value: [0.99552573 0.99441341 0.99552573 0.99552573 0.9955157 0.9955157
|
|
0.99328859 0.9944009 0.99330357 0.99330357]
|
|
|
|
mean value: 0.9946318608481645
|
|
|
|
key: test_precision
|
|
value: [0.89090909 0.94230769 0.9245283 0.90740741 1. 0.90909091
|
|
0.94339623 0.83333333 0.9245283 0.90740741]
|
|
|
|
mean value: 0.918290867064452
|
|
|
|
key: train_precision
|
|
value: [0.99109131 0.98888889 0.99109131 0.99109131 0.99107143 0.99107143
|
|
0.98666667 0.98886414 0.98669623 0.98669623]
|
|
|
|
mean value: 0.9893228958528268
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.94 0.97 0.96 0.95 1. 0.94897959
|
|
0.96938776 0.89795918 0.95918367 0.94897959]
|
|
|
|
mean value: 0.9544489795918367
|
|
|
|
key: train_roc_auc
|
|
value: [0.9954955 0.99436937 0.9954955 0.9954955 0.99550562 0.99550562
|
|
0.99325843 0.99438202 0.99325843 0.99325843]
|
|
|
|
mean value: 0.9946024395181698
|
|
|
|
key: test_jcc
|
|
value: [0.89090909 0.94230769 0.9245283 0.90740741 1. 0.90909091
|
|
0.94339623 0.83333333 0.9245283 0.90740741]
|
|
|
|
mean value: 0.918290867064452
|
|
|
|
key: train_jcc
|
|
value: [0.99109131 0.98888889 0.99109131 0.99109131 0.99107143 0.99107143
|
|
0.98666667 0.98886414 0.98669623 0.98669623]
|
|
|
|
mean value: 0.9893228958528268
|
|
|
|
MCC on Blind test: 0.3
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.08827615 1.06060791 1.04731488 1.04000974 1.04840255 1.0410111
|
|
1.05050111 1.04558849 1.0482564 1.04188418]
|
|
|
|
mean value: 1.0511852502822876
|
|
|
|
key: score_time
|
|
value: [0.01014209 0.00963998 0.00952101 0.00953627 0.00965571 0.00955391
|
|
0.00955701 0.00950766 0.009763 0.00943875]
|
|
|
|
mean value: 0.009631538391113281
|
|
|
|
key: test_mcc
|
|
value: [0.94115314 0.96039208 0.94115314 0.96039208 0.94108303 0.96036035
|
|
0.94108303 0.885171 0.9797959 0.95998366]
|
|
|
|
mean value: 0.9470567397487186
|
|
|
|
key: train_mcc
|
|
value: [1. 0.9977528 1. 1. 0.99775281 1.
|
|
1. 0.99775281 1. 0.99775533]
|
|
|
|
mean value: 0.9991013743863456
|
|
|
|
key: test_accuracy
|
|
value: [0.96969697 0.97979798 0.96969697 0.97979798 0.96969697 0.97979798
|
|
0.96969697 0.93939394 0.98979592 0.97959184]
|
|
|
|
mean value: 0.9726963512677799
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99887514 1. 1. 0.99887514 1.
|
|
1. 0.99887514 1. 0.9988764 ]
|
|
|
|
mean value: 0.9995501826316654
|
|
|
|
key: test_fscore
|
|
value: [0.97029703 0.98 0.97029703 0.98 0.97087379 0.98039216
|
|
0.97087379 0.94339623 0.98989899 0.98 ]
|
|
|
|
mean value: 0.9736029005398303
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99887767 1. 1. 0.99887514 1.
|
|
1. 0.99887514 1. 0.99887767]
|
|
|
|
mean value: 0.9995505612303512
|
|
|
|
key: test_precision
|
|
value: [0.94230769 0.96078431 0.94230769 0.96078431 0.94339623 0.96153846
|
|
0.94339623 0.89285714 0.98 0.96078431]
|
|
|
|
mean value: 0.9488156383017649
|
|
|
|
key: train_precision
|
|
value: [1. 0.99775785 1. 1. 0.99775281 1.
|
|
1. 0.99775281 1. 0.99775785]
|
|
|
|
mean value: 0.9991021313044792
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.97 0.98 0.97 0.98 0.96938776 0.97959184
|
|
0.96938776 0.93877551 0.98979592 0.97959184]
|
|
|
|
mean value: 0.9726530612244898
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99887387 1. 1. 0.9988764 1.
|
|
1. 0.9988764 1. 0.9988764 ]
|
|
|
|
mean value: 0.999550308735702
|
|
|
|
key: test_jcc
|
|
value: [0.94230769 0.96078431 0.94230769 0.96078431 0.94339623 0.96153846
|
|
0.94339623 0.89285714 0.98 0.96078431]
|
|
|
|
mean value: 0.9488156383017649
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99775785 1. 1. 0.99775281 1.
|
|
1. 0.99775281 1. 0.99775785]
|
|
|
|
mean value: 0.9991021313044792
|
|
|
|
MCC on Blind test: 0.71
|
|
|
|
Accuracy on Blind test: 0.93
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03933692 0.04640722 0.043329 0.05011773 0.04333067 0.05033231
|
|
0.04319024 0.04641628 0.04378629 0.04761744]
|
|
|
|
mean value: 0.04538640975952148
|
|
|
|
key: score_time
|
|
value: [0.01305914 0.01294613 0.01313424 0.01538229 0.01295948 0.01681566
|
|
0.01300764 0.01295853 0.0129261 0.02229738]
|
|
|
|
mean value: 0.014548659324645996
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.97999192 1. 0.9797959 1. ]
|
|
|
|
mean value: 0.9959787812213322
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98989899 1. 0.98979592 1. ]
|
|
|
|
mean value: 0.9979694908266337
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.99009901 1. 0.98989899 1. ]
|
|
|
|
mean value: 0.997999799979998
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98039216 1. 0.98 1. ]
|
|
|
|
mean value: 0.9960392156862745
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98979592 1. 0.98979592 1. ]
|
|
|
|
mean value: 0.9979591836734694
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.98039216 1. 0.98 1. ]
|
|
|
|
mean value: 0.9960392156862745
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02693081 0.0426867 0.0426743 0.04256725 0.03964114 0.04223228
|
|
0.04247332 0.04274869 0.04339147 0.04276395]
|
|
|
|
mean value: 0.04081099033355713
|
|
|
|
key: score_time
|
|
value: [0.01915526 0.01986718 0.01911521 0.01912975 0.01907897 0.01920676
|
|
0.01915097 0.02495766 0.01918554 0.01919532]
|
|
|
|
mean value: 0.0198042631149292
|
|
|
|
key: test_mcc
|
|
value: [0.77795918 0.81977994 0.78197378 0.82254789 0.85871792 0.82623193
|
|
0.89918367 0.80829204 0.91836735 0.80195322]
|
|
|
|
mean value: 0.831500691590235
|
|
|
|
key: train_mcc
|
|
value: [0.8860686 0.90757425 0.88263676 0.89378099 0.89332285 0.89141697
|
|
0.86356278 0.89568737 0.88015527 0.88893422]
|
|
|
|
mean value: 0.8883140062978838
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.90909091 0.88888889 0.90909091 0.92929293 0.90909091
|
|
0.94949495 0.8989899 0.95918367 0.89795918]
|
|
|
|
mean value: 0.9139971139971139
|
|
|
|
key: train_accuracy
|
|
value: [0.94263217 0.95275591 0.94038245 0.94600675 0.94600675 0.94488189
|
|
0.93138358 0.94713161 0.93932584 0.94382022]
|
|
|
|
mean value: 0.9434327169777935
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.91089109 0.89320388 0.91262136 0.93069307 0.91588785
|
|
0.94949495 0.90740741 0.95918367 0.90384615]
|
|
|
|
mean value: 0.9172118324708366
|
|
|
|
key: train_fscore
|
|
value: [0.94389439 0.95434783 0.94232862 0.94771242 0.94736842 0.94644809
|
|
0.93274531 0.94852136 0.94104803 0.9452954 ]
|
|
|
|
mean value: 0.9449709872505119
|
|
|
|
key: test_precision
|
|
value: [0.88 0.88461538 0.85185185 0.87037037 0.92156863 0.85964912
|
|
0.95918367 0.84482759 0.95918367 0.85454545]
|
|
|
|
mean value: 0.8885795744786731
|
|
|
|
key: train_precision
|
|
value: [0.92456897 0.92421053 0.91350211 0.91966173 0.92307692 0.91932059
|
|
0.91360691 0.92324094 0.91507431 0.92110874]
|
|
|
|
mean value: 0.9197371754306076
|
|
|
|
key: test_recall
|
|
value: [0.89795918 0.93877551 0.93877551 0.95918367 0.94 0.98
|
|
0.94 0.98 0.95918367 0.95918367]
|
|
|
|
mean value: 0.9493061224489796
|
|
|
|
key: train_recall
|
|
value: [0.96404494 0.98651685 0.97303371 0.97752809 0.97297297 0.97522523
|
|
0.9527027 0.97522523 0.96853933 0.97078652]
|
|
|
|
mean value:/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:196: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./embb_cd_7030.py:199: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
0.9716575564328374
|
|
|
|
key: test_roc_auc
|
|
value: [0.88897959 0.90938776 0.88938776 0.90959184 0.92918367 0.90836735
|
|
0.94959184 0.89816327 0.95918367 0.89795918]
|
|
|
|
mean value: 0.9139795918367347
|
|
|
|
key: train_roc_auc
|
|
value: [0.94260806 0.95271789 0.94034568 0.94597125 0.94603705 0.94491598
|
|
0.93140753 0.94716317 0.93932584 0.94382022]
|
|
|
|
mean value: 0.9434312683469986
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.83636364 0.80701754 0.83928571 0.87037037 0.84482759
|
|
0.90384615 0.83050847 0.92156863 0.8245614 ]
|
|
|
|
mean value: 0.8478349510468444
|
|
|
|
key: train_jcc
|
|
value: [0.89375 0.91268191 0.8909465 0.90062112 0.9 0.89834025
|
|
0.87396694 0.90208333 0.88865979 0.89626556]
|
|
|
|
mean value: 0.8957315411177106
|
|
|
|
MCC on Blind test: 0.65
|
|
|
|
Accuracy on Blind test: 0.89
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.32786989 0.35880113 0.34923577 0.35903668 0.35974002 0.47243118
|
|
0.35273027 0.34262276 0.35770893 0.34203053]
|
|
|
|
mean value: 0.36222071647644044
|
|
|
|
key: score_time
|
|
value: [0.01919818 0.01935267 0.01916862 0.01917624 0.01922011 0.01917219
|
|
0.0192337 0.01925063 0.01923895 0.01925302]
|
|
|
|
mean value: 0.019226431846618652
|
|
|
|
key: test_mcc
|
|
value: [0.77795918 0.81977994 0.81977994 0.82254789 0.85871792 0.82623193
|
|
0.96036035 0.80829204 0.91836735 0.80195322]
|
|
|
|
mean value: 0.8413989745527527
|
|
|
|
key: train_mcc
|
|
value: [0.8860686 0.90757425 0.89331004 0.89378099 0.89332285 0.88716246
|
|
0.88561868 0.89568737 0.88015527 0.88893422]
|
|
|
|
mean value: 0.8911614733764517
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.90909091 0.90909091 0.90909091 0.92929293 0.90909091
|
|
0.97979798 0.8989899 0.95918367 0.89795918]
|
|
|
|
mean value: 0.919047619047619
|
|
|
|
key: train_accuracy
|
|
value: [0.94263217 0.95275591 0.94600675 0.94600675 0.94600675 0.94263217
|
|
0.94150731 0.94713161 0.93932584 0.94382022]
|
|
|
|
mean value: 0.9447825482488846
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.91089109 0.91089109 0.91262136 0.93069307 0.91588785
|
|
0.98039216 0.90740741 0.95918367 0.90384615]
|
|
|
|
mean value: 0.9220702737689926
|
|
|
|
key: train_fscore
|
|
value: [0.94389439 0.95434783 0.94748359 0.94771242 0.94736842 0.94438386
|
|
0.94360087 0.94852136 0.94104803 0.9452954 ]
|
|
|
|
mean value: 0.9463656169502398
|
|
|
|
key: test_precision
|
|
value: [0.88 0.88461538 0.88461538 0.87037037 0.92156863 0.85964912
|
|
0.96153846 0.84482759 0.95918367 0.85454545]
|
|
|
|
mean value: 0.8920914065619338
|
|
|
|
key: train_precision
|
|
value: [0.92456897 0.92421053 0.92324094 0.91966173 0.92307692 0.9154334
|
|
0.91004184 0.92324094 0.91507431 0.92110874]
|
|
|
|
mean value: 0.9199658321650512
|
|
|
|
key: test_recall
|
|
value: [0.89795918 0.93877551 0.93877551 0.95918367 0.94 0.98
|
|
1. 0.98 0.95918367 0.95918367]
|
|
|
|
mean value: 0.9553061224489796
|
|
|
|
key: train_recall
|
|
value: [0.96404494 0.98651685 0.97303371 0.97752809 0.97297297 0.97522523
|
|
0.97972973 0.97522523 0.96853933 0.97078652]
|
|
|
|
mean value: 0.97436025913554
|
|
|
|
key: test_roc_auc
|
|
value: [0.88897959 0.90938776 0.90938776 0.90959184 0.92918367 0.90836735
|
|
0.97959184 0.89816327 0.95918367 0.89795918]
|
|
|
|
mean value: 0.9189795918367347
|
|
|
|
key: train_roc_auc
|
|
value: [0.94260806 0.95271789 0.94597631 0.94597125 0.94603705 0.94266879
|
|
0.94155026 0.94716317 0.93932584 0.94382022]
|
|
|
|
mean value: 0.9447838850086041
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.83636364 0.83636364 0.83928571 0.87037037 0.84482759
|
|
0.96153846 0.83050847 0.92156863 0.8245614 ]
|
|
|
|
mean value: 0.8565387910664739
|
|
|
|
key: train_jcc
|
|
value: [0.89375 0.91268191 0.9002079 0.90062112 0.9 0.8946281
|
|
0.89322382 0.90208333 0.88865979 0.89626556]
|
|
|
|
mean value: 0.8982121536691379
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.87
|