19607 lines
983 KiB
Text
19607 lines
983 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_rt.py:550: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 817
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 817
|
|
ncols: 269
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 244
|
|
log10_or_mychisq 244
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 168
|
|
No. of categorical features: 7
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({0: 315, 1: 35}) Data dim: (350, 175)
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data: REVERSE training
|
|
imputed values: training set
|
|
actual values: blind test set
|
|
Train data size: (350, 175)
|
|
Test data size: (467, 175)
|
|
y_train numbers: Counter({0: 315, 1: 35})
|
|
y_train ratio: 9.0
|
|
|
|
y_test_numbers: Counter({1: 309, 0: 158})
|
|
y_test ratio: 0.511326860841424
|
|
-------------------------------------------------------------
|
|
Simple Random OverSampling
|
|
Counter({0: 315, 1: 315})
|
|
(630, 175)
|
|
Simple Random UnderSampling
|
|
Counter({0: 35, 1: 35})
|
|
(70, 175)
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 315, 1: 315})
|
|
(630, 175)
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 315, 1: 315})
|
|
(630, 175)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: REVERSE training
|
|
|
|
Gene name: katG
|
|
Drug name: isoniazid
|
|
|
|
Output directory: /home/tanu/git/Data/isoniazid/output/ml/tts_rt/
|
|
|
|
Sanity checks:
|
|
Total input features: 175
|
|
|
|
Training data size: (350, 175)
|
|
Test data size: (467, 175)
|
|
|
|
Target feature numbers (training data): Counter({0: 315, 1: 35})
|
|
Target features ratio (training data: 9.0
|
|
|
|
Target feature numbers (test data): Counter({1: 309, 0: 158})
|
|
Target features ratio (test data): 0.511326860841424
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 36
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_ppi2_affinity', 'interface_dist']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03331447 0.0305326 0.03297567 0.04060841 0.03268576 0.03099227
|
|
0.05662084 0.05377221 0.05179167 0.0333972 ]
|
|
|
|
mean value: 0.0396691083908081
|
|
|
|
key: score_time
|
|
value: [0.01208329 0.01174116 0.01245928 0.01186728 0.01179862 0.01175976
|
|
0.01649308 0.02123761 0.01188302 0.01174259]
|
|
|
|
mean value: 0.013306570053100587
|
|
|
|
key: test_mcc
|
|
value: [-0.09375 0.56011203 0. -0.0525105 0.56011203 0.53159579
|
|
0. -0.06160411 0.29845644 -0.06160411]
|
|
|
|
mean value: 0.16808075748896564
|
|
|
|
key: train_mcc
|
|
value: [0.51134507 0.27968064 0.336724 0.34647321 0.27968064 0.32717006
|
|
0.43532385 0.4596458 0.36569521 0.35311727]
|
|
|
|
mean value: 0.36948557590565084
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.94285714 0.91428571 0.88571429 0.94285714 0.91428571
|
|
0.88571429 0.85714286 0.88571429 0.85714286]
|
|
|
|
mean value: 0.8914285714285715
|
|
|
|
key: train_accuracy
|
|
value: [0.92698413 0.9047619 0.90793651 0.91111111 0.9047619 0.91111111
|
|
0.92063492 0.92380952 0.91428571 0.91428571]
|
|
|
|
mean value: 0.913968253968254
|
|
|
|
key: test_fscore
|
|
value: [0. 0.5 0. 0. 0.5 0.57142857
|
|
0. 0. 0.33333333 0. ]
|
|
|
|
mean value: 0.1904761904761905
|
|
|
|
key: train_fscore
|
|
value: [0.48888889 0.25 0.3255814 0.3 0.25 0.3
|
|
0.41860465 0.42857143 0.34146341 0.30769231]
|
|
|
|
mean value: 0.34108020862983995
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0. 0. 1. 0.66666667
|
|
0. 0. 0.5 0. ]
|
|
|
|
mean value: 0.31666666666666665
|
|
|
|
key: train_precision
|
|
value: [0.84615385 0.625 0.63636364 0.75 0.625 0.66666667
|
|
0.75 0.81818182 0.7 0.75 ]
|
|
|
|
mean value: 0.7167365967365967
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0. 0. 0.33333333 0.5
|
|
0. 0. 0.25 0. ]
|
|
|
|
mean value: 0.14166666666666666
|
|
|
|
key: train_recall
|
|
value: [0.34375 0.15625 0.21875 0.1875 0.15625 0.19354839
|
|
0.29032258 0.29032258 0.22580645 0.19354839]
|
|
|
|
mean value: 0.22560483870967743
|
|
|
|
key: test_roc_auc
|
|
value: [0.453125 0.66666667 0.5 0.484375 0.66666667 0.73387097
|
|
0.5 0.48387097 0.60887097 0.48387097]
|
|
|
|
mean value: 0.5581317204301075
|
|
|
|
key: train_roc_auc
|
|
value: [0.66834143 0.57282465 0.60230786 0.59021643 0.57282465 0.5914925
|
|
0.6398796 0.64164016 0.60762154 0.59325307]
|
|
|
|
mean value: 0.608040188727257
|
|
|
|
key: test_jcc
|
|
value: [0. 0.33333333 0. 0. 0.33333333 0.4
|
|
0. 0. 0.2 0. ]
|
|
|
|
mean value: 0.12666666666666668
|
|
|
|
key: train_jcc
|
|
value: [0.32352941 0.14285714 0.19444444 0.17647059 0.14285714 0.17647059
|
|
0.26470588 0.27272727 0.20588235 0.18181818]
|
|
|
|
mean value: 0.20817630082335964
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.73298287 0.80133295 0.99006128 0.78111005 0.78264093 0.92581987
|
|
0.75112128 0.88549256 0.79500484 0.81746364]
|
|
|
|
mean value: 0.8263030290603638
|
|
|
|
key: score_time
|
|
value: [0.01242161 0.01247191 0.01276898 0.01240301 0.0128634 0.01232457
|
|
0.01225853 0.01236439 0.01233482 0.01237226]
|
|
|
|
mean value: 0.012458348274230957
|
|
|
|
key: test_mcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_mcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91428571 0.91428571 0.91428571 0.91428571 0.91428571 0.88571429
|
|
0.88571429 0.88571429 0.88571429 0.88571429]
|
|
|
|
mean value: 0.8999999999999999
|
|
|
|
key: train_accuracy
|
|
value: [0.8984127 0.8984127 0.8984127 0.8984127 0.8984127 0.9015873 0.9015873
|
|
0.9015873 0.9015873 0.9015873]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
|
|
|
|
mean value: 0.5
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01412082 0.01137376 0.00953507 0.00931573 0.0093925 0.00938797
|
|
0.00931072 0.0095191 0.00930643 0.00971961]
|
|
|
|
mean value: 0.01009817123413086
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
score_time
|
|
value: [0.01191306 0.00912666 0.00884581 0.0086441 0.00869894 0.00872707
|
|
0.00873566 0.00870252 0.00875473 0.01172304]
|
|
|
|
mean value: 0.00938715934753418
|
|
|
|
key: test_mcc
|
|
value: [0.35355339 0.31506302 0.20728905 0.14731391 0.45226702 0.39144068
|
|
0.43994135 0.21117195 0.18994601 0.36962466]
|
|
|
|
mean value: 0.3077611029467061
|
|
|
|
key: train_mcc
|
|
value: [0.32719066 0.34382254 0.36878901 0.36178345 0.33307766 0.35319752
|
|
0.33781136 0.35751785 0.38397997 0.34213199]
|
|
|
|
mean value: 0.350930202786909
|
|
|
|
key: test_accuracy
|
|
value: [0.65714286 0.6 0.4 0.6 0.77142857 0.65714286
|
|
0.71428571 0.6 0.57142857 0.62857143]
|
|
|
|
mean value: 0.62
|
|
|
|
key: train_accuracy
|
|
value: [0.61269841 0.61269841 0.64761905 0.63809524 0.5968254 0.63174603
|
|
0.60952381 0.66031746 0.67301587 0.61587302]
|
|
|
|
mean value: 0.6298412698412699
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.3 0.22222222 0.22222222 0.42857143 0.4
|
|
0.44444444 0.3 0.28571429 0.38095238]
|
|
|
|
mean value: 0.33174603174603173
|
|
|
|
key: train_fscore
|
|
value: [0.33695652 0.34408602 0.36571429 0.35955056 0.33507853 0.34831461
|
|
0.33513514 0.35928144 0.37575758 0.33879781]
|
|
|
|
mean value: 0.3498672493755642
|
|
|
|
key: test_precision
|
|
value: [0.2 0.17647059 0.125 0.13333333 0.27272727 0.25
|
|
0.28571429 0.1875 0.17647059 0.23529412]
|
|
|
|
mean value: 0.20425101858925387
|
|
|
|
key: train_precision
|
|
value: [0.20394737 0.20779221 0.22377622 0.21917808 0.20125786 0.21088435
|
|
0.2012987 0.22058824 0.23134328 0.20394737]
|
|
|
|
mean value: 0.2124013686153943
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.66666667 1. 1.
|
|
1. 0.75 0.75 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_recall
|
|
value: [0.96875 1. 1. 1. 1. 1.
|
|
1. 0.96774194 1. 1. ]
|
|
|
|
mean value: 0.9936491935483871
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.78125 0.671875 0.63020833 0.875 0.80645161
|
|
0.83870968 0.66532258 0.64919355 0.79032258]
|
|
|
|
mean value: 0.7520833333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.77059408 0.7844523 0.80388693 0.79858657 0.77561837 0.79577465
|
|
0.7834507 0.79725125 0.81866197 0.78697183]
|
|
|
|
mean value: 0.7915248655245046
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.17647059 0.125 0.125 0.27272727 0.25
|
|
0.28571429 0.17647059 0.16666667 0.23529412]
|
|
|
|
mean value: 0.20133435192258722
|
|
|
|
key: train_jcc
|
|
value: [0.20261438 0.20779221 0.22377622 0.21917808 0.20125786 0.21088435
|
|
0.2012987 0.2189781 0.23134328 0.20394737]
|
|
|
|
mean value: 0.2121070563713521
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00983262 0.00951886 0.0101335 0.00972223 0.00972033 0.00962758
|
|
0.00971341 0.00964165 0.00951076 0.00950766]
|
|
|
|
mean value: 0.009692859649658204
|
|
|
|
key: score_time
|
|
value: [0.00888991 0.00905991 0.00895095 0.00880742 0.00868535 0.00898767
|
|
0.0089345 0.00869703 0.00874686 0.0087595 ]
|
|
|
|
mean value: 0.008851909637451172
|
|
|
|
key: test_mcc
|
|
value: [-0.0525105 0. -0.0525105 0.16666667 -0.10998534 0.10998534
|
|
0.47743186 0.43548387 -0.08843154 0.47743186]
|
|
|
|
mean value: 0.13635617043074624
|
|
|
|
key: train_mcc
|
|
value: [0.24522731 0.24522731 0.22087403 0.14357632 0.20935601 0.21262468
|
|
0.22965647 0.19934826 0.21479649 0.16625926]
|
|
|
|
mean value: 0.20869461204242698
|
|
|
|
key: test_accuracy
|
|
value: [0.88571429 0.91428571 0.88571429 0.82857143 0.8 0.8
|
|
0.91428571 0.88571429 0.82857143 0.91428571]
|
|
|
|
mean value: 0.8657142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.88888889 0.88888889 0.88888889 0.87301587 0.88571429 0.8952381
|
|
0.88571429 0.89206349 0.88888889 0.88253968]
|
|
|
|
mean value: 0.886984126984127
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0.25 0. 0.22222222
|
|
0.4 0.5 0. 0.4 ]
|
|
|
|
mean value: 0.17722222222222223
|
|
|
|
key: train_fscore
|
|
value: [0.28571429 0.28571429 0.25531915 0.2 0.25 0.23255814
|
|
0.28 0.22727273 0.25531915 0.21276596]
|
|
|
|
mean value: 0.24846636935553312
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0.2 0. 0.2 1. 0.5 0. 1. ]
|
|
|
|
mean value: 0.29
|
|
|
|
key: train_precision
|
|
value: [0.41176471 0.41176471 0.4 0.27777778 0.375 0.41666667
|
|
0.36842105 0.38461538 0.375 0.3125 ]
|
|
|
|
mean value: 0.3733510293456114
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0.33333333 0. 0.25
|
|
0.25 0.5 0. 0.25 ]
|
|
|
|
mean value: 0.15833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.21875 0.21875 0.1875 0.15625 0.1875 0.16129032
|
|
0.22580645 0.16129032 0.19354839 0.16129032]
|
|
|
|
mean value: 0.1871975806451613
|
|
|
|
key: test_roc_auc
|
|
value: [0.484375 0.5 0.484375 0.60416667 0.4375 0.56048387
|
|
0.625 0.71774194 0.46774194 0.625 ]
|
|
|
|
mean value: 0.5506384408602151
|
|
|
|
key: train_roc_auc
|
|
value: [0.59170716 0.59170716 0.57784894 0.5551568 0.57608216 0.56832122
|
|
0.59177647 0.56656065 0.57916856 0.56127896]
|
|
|
|
mean value: 0.5759608069452851
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0. 0. 0. 0.14285714 0. 0.125
|
|
0.25 0.33333333 0. 0.25 ]
|
|
|
|
mean value: 0.11011904761904762
|
|
|
|
key: train_jcc
|
|
value: [0.16666667 0.16666667 0.14634146 0.11111111 0.14285714 0.13157895
|
|
0.1627907 0.12820513 0.14634146 0.11904762]
|
|
|
|
mean value: 0.14216069064264425
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.36
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00901008 0.01056027 0.01025176 0.00995803 0.01000857 0.01000237
|
|
0.0100596 0.00992966 0.0088861 0.00979424]
|
|
|
|
mean value: 0.009846067428588868
|
|
|
|
key: score_time
|
|
value: [0.05954075 0.01788521 0.01179361 0.01160502 0.01176715 0.015517
|
|
0.01643443 0.01144218 0.01106095 0.01159406]
|
|
|
|
mean value: 0.017864036560058593
|
|
|
|
key: test_mcc
|
|
value: [ 0.36432621 0. -0.0525105 -0.09375 0. -0.06160411
|
|
0.47743186 0.29845644 0.29845644 0. ]
|
|
|
|
mean value: 0.12308063290184254
|
|
|
|
key: train_mcc
|
|
value: [0.35866849 0.37223834 0.42713304 0.40568833 0.25770828 0.40894539
|
|
0.32717006 0.36569521 0.30650645 0.28537553]
|
|
|
|
mean value: 0.35151291274491253
|
|
|
|
key: test_accuracy
|
|
value: [0.91428571 0.91428571 0.88571429 0.82857143 0.91428571 0.85714286
|
|
0.91428571 0.88571429 0.88571429 0.88571429]
|
|
|
|
mean value: 0.8885714285714286
|
|
|
|
key: train_accuracy
|
|
value: [0.91111111 0.91111111 0.91746032 0.91428571 0.9015873 0.91428571
|
|
0.91111111 0.91428571 0.9047619 0.90793651]
|
|
|
|
mean value: 0.9107936507936508
|
|
|
|
key: test_fscore
|
|
value: [0.4 0. 0. 0. 0. 0.
|
|
0.4 0.33333333 0.33333333 0. ]
|
|
|
|
mean value: 0.14666666666666667
|
|
|
|
key: train_fscore
|
|
value: [0.33333333 0.36363636 0.40909091 0.4 0.24390244 0.42553191
|
|
0.3 0.34146341 0.31818182 0.25641026]
|
|
|
|
mean value: 0.33915504492048343
|
|
|
|
key: test_precision
|
|
value: [0.5 0. 0. 0. 0. 0. 1. 0.5 0.5 0. ]
|
|
|
|
mean value: 0.25
|
|
|
|
key: train_precision
|
|
value: [0.7 0.66666667 0.75 0.69230769 0.55555556 0.625
|
|
0.66666667 0.7 0.53846154 0.625 ]
|
|
|
|
mean value: 0.6519658119658119
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0. 0. 0. 0. 0.
|
|
0.25 0.25 0.25 0. ]
|
|
|
|
mean value: 0.10833333333333334
|
|
|
|
key: train_recall
|
|
value: [0.21875 0.25 0.28125 0.28125 0.15625 0.32258065
|
|
0.19354839 0.22580645 0.22580645 0.16129032]
|
|
|
|
mean value: 0.23165322580645162
|
|
|
|
key: test_roc_auc
|
|
value: [0.65104167 0.5 0.484375 0.453125 0.5 0.48387097
|
|
0.625 0.60887097 0.60887097 0.5 ]
|
|
|
|
mean value: 0.5415154569892473
|
|
|
|
key: train_roc_auc
|
|
value: [0.60407465 0.61793286 0.63532465 0.63355786 0.57105786 0.65072694
|
|
0.5914925 0.60762154 0.60233985 0.57536347]
|
|
|
|
mean value: 0.6089492177905
|
|
|
|
key: test_jcc
|
|
value: [0.25 0. 0. 0. 0. 0. 0.25 0.2 0.2 0. ]
|
|
|
|
mean value: 0.09
|
|
|
|
key: train_jcc
|
|
value: [0.2 0.22222222 0.25714286 0.25 0.13888889 0.27027027
|
|
0.17647059 0.20588235 0.18918919 0.14705882]
|
|
|
|
mean value: 0.205712519241931
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01418018 0.01330853 0.01229477 0.01421261 0.01521015 0.01336908
|
|
0.01467919 0.01427603 0.01356292 0.0151453 ]
|
|
|
|
mean value: 0.014023876190185547
|
|
|
|
key: score_time
|
|
value: [0.00957322 0.01008129 0.01048541 0.01044345 0.01095033 0.01100969
|
|
0.01069927 0.01073241 0.01105809 0.01087928]
|
|
|
|
mean value: 0.010591244697570801
|
|
|
|
key: test_mcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_mcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91428571 0.91428571 0.91428571 0.91428571 0.91428571 0.88571429
|
|
0.88571429 0.88571429 0.88571429 0.88571429]
|
|
|
|
mean value: 0.8999999999999999
|
|
|
|
key: train_accuracy
|
|
value: [0.8984127 0.8984127 0.8984127 0.8984127 0.8984127 0.9015873 0.9015873
|
|
0.9015873 0.9015873 0.9015873]
|
|
|
|
mean value: 0.9
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
|
|
|
|
mean value: 0.5
|
|
|
|
key: train_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]
|
|
|
|
mean value: 0.5
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.34538007 1.39908123 1.49655676 1.48094082 1.34476376 1.17290211
|
|
1.32331443 1.18609762 1.3957324 1.2422936 ]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
|
|
mean value: 1.3387062788009643
|
|
|
|
key: score_time
|
|
value: [0.01540971 0.01488876 0.01393461 0.01501346 0.01491404 0.01459074
|
|
0.01448417 0.01443577 0.01422596 0.01412582]
|
|
|
|
mean value: 0.014602303504943848
|
|
|
|
key: test_mcc
|
|
value: [-0.09375 0.56011203 0.36432621 0.27083333 0. 0.21080523
|
|
-0.06160411 -0.08843154 0.29845644 0.62325024]
|
|
|
|
mean value: 0.20839978332321352
|
|
|
|
key: train_mcc
|
|
value: [0.96521643 1. 1. 1. 1. 0.98201138
|
|
1. 1. 1. 0.96381639]
|
|
|
|
mean value: 0.9911044196630064
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.94285714 0.91428571 0.88571429 0.91428571 0.85714286
|
|
0.85714286 0.82857143 0.88571429 0.91428571]
|
|
|
|
mean value: 0.8828571428571428
|
|
|
|
key: train_accuracy
|
|
value: [0.99365079 1. 1. 1. 1. 0.9968254
|
|
1. 1. 1. 0.99365079]
|
|
|
|
mean value: 0.9984126984126984
|
|
|
|
key: test_fscore
|
|
value: [0. 0.5 0.4 0.33333333 0. 0.28571429
|
|
0. 0. 0.33333333 0.66666667]
|
|
|
|
mean value: 0.2519047619047619
|
|
|
|
key: train_fscore
|
|
value: [0.96875 1. 1. 1. 1. 0.98360656
|
|
1. 1. 1. 0.96666667]
|
|
|
|
mean value: 0.9919023224043716
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0.5 0.33333333 0. 0.33333333
|
|
0. 0. 0.5 0.6 ]
|
|
|
|
mean value: 0.32666666666666666
|
|
|
|
key: train_precision
|
|
value: [0.96875 1. 1. 1. 1. 1. 1. 1. 1.
|
|
1. ]
|
|
|
|
mean value: 0.996875
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0.33333333 0.33333333 0. 0.25
|
|
0. 0. 0.25 0.75 ]
|
|
|
|
mean value: 0.225
|
|
|
|
key: train_recall
|
|
value: [0.96875 1. 1. 1. 1. 0.96774194
|
|
1. 1. 1. 0.93548387]
|
|
|
|
mean value: 0.9871975806451613
|
|
|
|
key: test_roc_auc
|
|
value: [0.453125 0.66666667 0.65104167 0.63541667 0.5 0.59274194
|
|
0.48387097 0.46774194 0.60887097 0.84274194]
|
|
|
|
mean value: 0.5902217741935484
|
|
|
|
key: train_roc_auc
|
|
value: [0.98260822 1. 1. 1. 1. 0.98387097
|
|
1. 1. 1. 0.96774194]
|
|
|
|
mean value: 0.993422111877351
|
|
|
|
key: test_jcc
|
|
value: [0. 0.33333333 0.25 0.2 0. 0.16666667
|
|
0. 0. 0.2 0.5 ]
|
|
|
|
mean value: 0.165
|
|
|
|
key: train_jcc
|
|
value: [0.93939394 1. 1. 1. 1. 0.96774194
|
|
1. 1. 1. 0.93548387]
|
|
|
|
mean value: 0.9842619745845552
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.39
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03597021 0.03329563 0.03372788 0.03014588 0.03348446 0.02683616
|
|
0.02680779 0.02815747 0.02972507 0.03359985]
|
|
|
|
mean value: 0.03117504119873047
|
|
|
|
key: score_time
|
|
value: [0.01183224 0.00938368 0.00902939 0.00889587 0.00888062 0.00926757
|
|
0.00925398 0.00914645 0.00899363 0.00932932]
|
|
|
|
mean value: 0.009401273727416993
|
|
|
|
key: test_mcc
|
|
value: [-0.09375 0.36432621 0.53159579 0.10206207 0.27083333 0.04490133
|
|
-0.08843154 0.10998534 0.21080523 0.55144163]
|
|
|
|
mean value: 0.2003769391627726
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.91428571 0.91428571 0.77142857 0.88571429 0.74285714
|
|
0.82857143 0.8 0.85714286 0.88571429]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.4 0.57142857 0.2 0.33333333 0.18181818
|
|
0. 0.22222222 0.28571429 0.6 ]
|
|
|
|
mean value: 0.27945165945165945
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0.5 0.5 0.14285714 0.33333333 0.14285714
|
|
0. 0.2 0.33333333 0.5 ]
|
|
|
|
mean value: 0.2652380952380952
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0.66666667 0.33333333 0.33333333 0.25
|
|
0. 0.25 0.25 0.75 ]
|
|
|
|
mean value: 0.31666666666666665
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.453125 0.65104167 0.80208333 0.57291667 0.63541667 0.52822581
|
|
0.46774194 0.56048387 0.59274194 0.8266129 ]
|
|
|
|
mean value: 0.6090389784946236
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.25 0.4 0.11111111 0.2 0.1
|
|
0. 0.125 0.16666667 0.42857143]
|
|
|
|
mean value: 0.17813492063492065
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10471964 0.1002183 0.09712696 0.09745193 0.09763312 0.09675956
|
|
0.09710264 0.09954405 0.10006046 0.09823823]
|
|
|
|
mean value: 0.09888548851013183
|
|
|
|
key: score_time
|
|
value: [0.01852775 0.01747513 0.01741457 0.01715994 0.01719284 0.01720977
|
|
0.01727819 0.0182023 0.01843143 0.0188179 ]
|
|
|
|
mean value: 0.017770981788635253
|
|
|
|
key: test_mcc
|
|
value: [-0.07537784 0.56011203 0. -0.0525105 0. 0.21080523
|
|
0.47743186 -0.08843154 -0.06160411 0.53159579]
|
|
|
|
mean value: 0.1502020921406983
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.94285714 0.91428571 0.88571429 0.91428571 0.85714286
|
|
0.91428571 0.82857143 0.85714286 0.91428571]
|
|
|
|
mean value: 0.8885714285714286
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.5 0. 0. 0. 0.28571429
|
|
0.4 0. 0. 0.57142857]
|
|
|
|
mean value: 0.17571428571428574
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0. 0. 0. 0.33333333
|
|
1. 0. 0. 0.66666667]
|
|
|
|
mean value: 0.3
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0. 0. 0. 0.25
|
|
0.25 0. 0. 0.5 ]
|
|
|
|
mean value: 0.13333333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.46875 0.66666667 0.5 0.484375 0.5 0.59274194
|
|
0.625 0.46774194 0.48387097 0.73387097]
|
|
|
|
mean value: 0.5523017473118279
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.33333333 0. 0. 0. 0.16666667
|
|
0.25 0. 0. 0.4 ]
|
|
|
|
mean value: 0.115
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00993443 0.00951266 0.01008153 0.00948215 0.00960541 0.01048779
|
|
0.00992322 0.01007986 0.00985813 0.01016045]
|
|
|
|
mean value: 0.009912562370300294
|
|
|
|
key: score_time
|
|
value: [0.00870562 0.00859618 0.00887036 0.0086813 0.00854254 0.00895667
|
|
0.00868225 0.00950742 0.00890946 0.00932264]
|
|
|
|
mean value: 0.008877444267272949
|
|
|
|
key: test_mcc
|
|
value: [-0.10998534 0.27083333 0.4023399 -0.07537784 0.21080523 0.10998534
|
|
0.43548387 -0.14664712 0. 0.29845644]
|
|
|
|
mean value: 0.13958938199547216
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.88571429 0.85714286 0.85714286 0.85714286 0.8
|
|
0.88571429 0.74285714 0.88571429 0.88571429]
|
|
|
|
mean value: 0.8457142857142856
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.33333333 0.44444444 0. 0.28571429 0.22222222
|
|
0.5 0. 0. 0.33333333]
|
|
|
|
mean value: 0.2119047619047619
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0.33333333 0.33333333 0. 0.25 0.2
|
|
0.5 0. 0. 0.5 ]
|
|
|
|
mean value: 0.21166666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0.66666667 0. 0.33333333 0.25
|
|
0.5 0. 0. 0.25 ]
|
|
|
|
mean value: 0.23333333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.4375 0.63541667 0.77083333 0.46875 0.61979167 0.56048387
|
|
0.71774194 0.41935484 0.5 0.60887097]
|
|
|
|
mean value: 0.5738743279569892
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.2 0.28571429 0. 0.16666667 0.125
|
|
0.33333333 0. 0. 0.2 ]
|
|
|
|
mean value: 0.13107142857142856
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.51644635 1.49742889 1.5148592 1.53154373 1.55151343 1.53863645
|
|
1.49726319 1.55899858 1.54065752 1.58198071]
|
|
|
|
mean value: 1.5329328060150147
|
|
|
|
key: score_time
|
|
value: [0.09631395 0.09293795 0.14660478 0.09270954 0.09351659 0.08990788
|
|
0.094733 0.09738302 0.09562755 0.09381151]
|
|
|
|
mean value: 0.09935457706451416
|
|
|
|
key: test_mcc
|
|
value: [-0.0525105 -0.0525105 0. -0.0525105 0. 0.47743186
|
|
0. -0.08843154 0. 0.29845644]
|
|
|
|
mean value: 0.052992524779852085
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88571429 0.88571429 0.91428571 0.88571429 0.91428571 0.91428571
|
|
0.88571429 0.82857143 0.88571429 0.88571429]
|
|
|
|
mean value: 0.8885714285714286
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0.4
|
|
0. 0. 0. 0.33333333]
|
|
|
|
mean value: 0.07333333333333333
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.5]
|
|
|
|
mean value: 0.15
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0.25 0. 0. 0. 0.25]
|
|
|
|
mean value: 0.05
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.484375 0.484375 0.5 0.484375 0.5 0.625
|
|
0.5 0.46774194 0.5 0.60887097]
|
|
|
|
mean value: 0.5154737903225807
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0.25 0. 0. 0. 0.2 ]
|
|
|
|
mean value: 0.045
|
|
|
|
key: train_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
|
|
key: fit_time
|
|
value: [1.78996897 0.90552139 0.894943 0.99807549 0.86834431 0.92986178
|
|
0.93068767 0.90419292 0.90186381 0.9327805 ]
|
|
|
|
mean value: 1.005623984336853
|
|
|
|
key: score_time
|
|
value: [0.21278787 0.24770594 0.24243164 0.23456454 0.2313323 0.19642854
|
|
0.14995933 0.17742658 0.23284769 0.21374011]
|
|
|
|
mean value: 0.21392245292663575
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0. -0.0525105 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: -0.005251050315105037
|
|
|
|
key: train_mcc
|
|
value: [0.33726248 0.16782374 0.33726248 0.37767861 0.29160943 0.24194751
|
|
0.34326389 0.29679847 0.34326389 0.24194751]
|
|
|
|
mean value: 0.2978858005359238
|
|
|
|
key: test_accuracy
|
|
value: [0.91428571 0.91428571 0.91428571 0.88571429 0.91428571 0.88571429
|
|
0.88571429 0.88571429 0.88571429 0.88571429]
|
|
|
|
mean value: 0.8971428571428571
|
|
|
|
key: train_accuracy
|
|
value: [0.91111111 0.9015873 0.91111111 0.91428571 0.90793651 0.90793651
|
|
0.91428571 0.91111111 0.91428571 0.90793651]
|
|
|
|
mean value: 0.9101587301587302
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0.22222222 0.06060606 0.22222222 0.27027027 0.17142857 0.12121212
|
|
0.22857143 0.17647059 0.22857143 0.12121212]
|
|
|
|
mean value: 0.18227870345517405
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0.125 0.03125 0.125 0.15625 0.09375 0.06451613
|
|
0.12903226 0.09677419 0.12903226 0.06451613]
|
|
|
|
mean value: 0.10151209677419355
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.5 0.484375 0.5 0.5 0.5 0.5
|
|
0.5 0.5 ]
|
|
|
|
mean value: 0.4984375
|
|
|
|
key: train_roc_auc
|
|
value: [0.5625 0.515625 0.5625 0.578125 0.546875 0.53225806
|
|
0.56451613 0.5483871 0.56451613 0.53225806]
|
|
|
|
mean value: 0.5507560483870968
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0.125 0.03125 0.125 0.15625 0.09375 0.06451613
|
|
0.12903226 0.09677419 0.12903226 0.06451613]
|
|
|
|
mean value: 0.10151209677419355
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0250752 0.01065731 0.01071143 0.01074243 0.01095748 0.0107255
|
|
0.01109242 0.01069355 0.01044393 0.01033878]
|
|
|
|
mean value: 0.012143802642822266
|
|
|
|
key: score_time
|
|
value: [0.01133585 0.00962234 0.00961447 0.00976562 0.00963736 0.00993538
|
|
0.00962138 0.00929904 0.00920677 0.0094161 ]
|
|
|
|
mean value: 0.009745430946350098
|
|
|
|
key: test_mcc
|
|
value: [-0.0525105 0. -0.0525105 0.16666667 -0.10998534 0.10998534
|
|
0.47743186 0.43548387 -0.08843154 0.47743186]
|
|
|
|
mean value: 0.13635617043074624
|
|
|
|
key: train_mcc
|
|
value: [0.24522731 0.24522731 0.22087403 0.14357632 0.20935601 0.21262468
|
|
0.22965647 0.19934826 0.21479649 0.16625926]
|
|
|
|
mean value: 0.20869461204242698
|
|
|
|
key: test_accuracy
|
|
value: [0.88571429 0.91428571 0.88571429 0.82857143 0.8 0.8
|
|
0.91428571 0.88571429 0.82857143 0.91428571]
|
|
|
|
mean value: 0.8657142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.88888889 0.88888889 0.88888889 0.87301587 0.88571429 0.8952381
|
|
0.88571429 0.89206349 0.88888889 0.88253968]
|
|
|
|
mean value: 0.886984126984127
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0.25 0. 0.22222222
|
|
0.4 0.5 0. 0.4 ]
|
|
|
|
mean value: 0.17722222222222223
|
|
|
|
key: train_fscore
|
|
value: [0.28571429 0.28571429 0.25531915 0.2 0.25 0.23255814
|
|
0.28 0.22727273 0.25531915 0.21276596]
|
|
|
|
mean value: 0.24846636935553312
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0.2 0. 0.2 1. 0.5 0. 1. ]
|
|
|
|
mean value: 0.29
|
|
|
|
key: train_precision
|
|
value: [0.41176471 0.41176471 0.4 0.27777778 0.375 0.41666667
|
|
0.36842105 0.38461538 0.375 0.3125 ]
|
|
|
|
mean value: 0.3733510293456114
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0.33333333 0. 0.25
|
|
0.25 0.5 0. 0.25 ]
|
|
|
|
mean value: 0.15833333333333333
|
|
|
|
key: train_recall
|
|
value: [0.21875 0.21875 0.1875 0.15625 0.1875 0.16129032
|
|
0.22580645 0.16129032 0.19354839 0.16129032]
|
|
|
|
mean value: 0.1871975806451613
|
|
|
|
key: test_roc_auc
|
|
value: [0.484375 0.5 0.484375 0.60416667 0.4375 0.56048387
|
|
0.625 0.71774194 0.46774194 0.625 ]
|
|
|
|
mean value: 0.5506384408602151
|
|
|
|
key: train_roc_auc
|
|
value: [0.59170716 0.59170716 0.57784894 0.5551568 0.57608216 0.56832122
|
|
0.59177647 0.56656065 0.57916856 0.56127896]
|
|
|
|
mean value: 0.5759608069452851
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0.14285714 0. 0.125
|
|
0.25 0.33333333 0. 0.25 ]
|
|
|
|
mean value: 0.11011904761904762
|
|
|
|
key: train_jcc
|
|
value: [0.16666667 0.16666667 0.14634146 0.11111111 0.14285714 0.13157895
|
|
0.1627907 0.12820513 0.14634146 0.11904762]
|
|
|
|
mean value: 0.14216069064264425
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
0.36
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.11506939 0.1129775 0.06830716 0.0669167 0.06651807 0.06604314
|
|
0.06712079 0.07409835 0.06559563 0.07286739]
|
|
|
|
mean value: 0.07755141258239746
|
|
|
|
key: score_time
|
|
value: [0.01184106 0.01083612 0.01109648 0.01035261 0.01036739 0.01035309
|
|
0.01031137 0.01076651 0.0104084 0.01064563]
|
|
|
|
mean value: 0.01069786548614502
|
|
|
|
key: test_mcc
|
|
value: [-0.09375 0.36432621 0.56011203 -0.07537784 0. 0.36661779
|
|
-0.06160411 0. 0.47743186 0.53159579]
|
|
|
|
mean value: 0.2069351729956762
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.91428571 0.94285714 0.85714286 0.91428571 0.85714286
|
|
0.85714286 0.88571429 0.91428571 0.91428571]
|
|
|
|
mean value: 0.8885714285714286
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.4 0.5 0. 0. 0.44444444
|
|
0. 0. 0.4 0.57142857]
|
|
|
|
mean value: 0.2315873015873016
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0.5 1. 0. 0. 0.4
|
|
0. 0. 1. 0.66666667]
|
|
|
|
mean value: 0.3566666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0.33333333 0. 0. 0.5
|
|
0. 0. 0.25 0.5 ]
|
|
|
|
mean value: 0.19166666666666665
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.453125 0.65104167 0.66666667 0.46875 0.5 0.7016129
|
|
0.48387097 0.5 0.625 0.73387097]
|
|
|
|
mean value: 0.578393817204301
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.25 0.33333333 0. 0. 0.28571429
|
|
0. 0. 0.25 0.4 ]
|
|
|
|
mean value: 0.1519047619047619
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.36
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05370188 0.04504299 0.03192163 0.03164124 0.05332661 0.0810895
|
|
0.06681418 0.06451797 0.11487222 0.04296494]
|
|
|
|
mean value: 0.05858931541442871
|
|
|
|
key: score_time
|
|
value: [0.01221609 0.01212215 0.01211476 0.01199985 0.03501606 0.02461076
|
|
0.02228451 0.02241588 0.02496934 0.01233697]
|
|
|
|
mean value: 0.019008636474609375
|
|
|
|
key: test_mcc
|
|
value: [-0.15309311 0.36432621 0.35721725 0.21080523 0.53159579 0.11892066
|
|
0.21080523 -0.16339011 0.15322581 -0.00587058]
|
|
|
|
mean value: 0.162454236697474
|
|
|
|
key: train_mcc
|
|
value: [0.86511367 0.83124878 0.7427984 0.80158038 0.80612728 0.75815226
|
|
0.72786407 0.74263948 0.82110404 0.77084941]
|
|
|
|
mean value: 0.7867477778451822
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.91428571 0.82857143 0.85714286 0.91428571 0.65714286
|
|
0.85714286 0.71428571 0.82857143 0.68571429]
|
|
|
|
mean value: 0.7971428571428572
|
|
|
|
key: train_accuracy
|
|
value: [0.97460317 0.96825397 0.95238095 0.96507937 0.96507937 0.95873016
|
|
0.95238095 0.95555556 0.96825397 0.95873016]
|
|
|
|
mean value: 0.9619047619047619
|
|
|
|
key: test_fscore
|
|
value: [0. 0.4 0.4 0.28571429 0.57142857 0.25
|
|
0.28571429 0. 0.25 0.15384615]
|
|
|
|
mean value: 0.2596703296703297
|
|
|
|
key: train_fscore
|
|
value: [0.87878788 0.84848485 0.76923077 0.81967213 0.82539683 0.77966102
|
|
0.75409836 0.76666667 0.83870968 0.79365079]
|
|
|
|
mean value: 0.8074358968389568
|
|
|
|
key: test_precision
|
|
value: [0. 0.5 0.28571429 0.25 0.5 0.16666667
|
|
0.33333333 0. 0.25 0.11111111]
|
|
|
|
mean value: 0.23968253968253966
|
|
|
|
key: train_precision
|
|
value: [0.85294118 0.82352941 0.75757576 0.86206897 0.83870968 0.82142857
|
|
0.76666667 0.79310345 0.83870968 0.78125 ]
|
|
|
|
mean value: 0.8135983352538103
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0.66666667 0.33333333 0.66666667 0.5
|
|
0.25 0. 0.25 0.25 ]
|
|
|
|
mean value: 0.325
|
|
|
|
key: train_recall
|
|
value: [0.90625 0.875 0.78125 0.78125 0.8125 0.74193548
|
|
0.74193548 0.74193548 0.83870968 0.80645161]
|
|
|
|
mean value: 0.8027217741935484
|
|
|
|
key: test_roc_auc
|
|
value: [0.390625 0.65104167 0.75520833 0.61979167 0.80208333 0.58870968
|
|
0.59274194 0.40322581 0.5766129 0.49596774]
|
|
|
|
mean value: 0.5876008064516128
|
|
|
|
key: train_roc_auc
|
|
value: [0.94429108 0.92689929 0.87649072 0.88355786 0.89741608 0.86216493
|
|
0.8586438 0.86040436 0.91055202 0.89090186]
|
|
|
|
mean value: 0.891132200489498
|
|
|
|
key: test_jcc
|
|
value: [0. 0.25 0.25 0.16666667 0.4 0.14285714
|
|
0.16666667 0. 0.14285714 0.08333333]
|
|
|
|
mean value: 0.16023809523809524
|
|
|
|
key: train_jcc
|
|
value: [0.78378378 0.73684211 0.625 0.69444444 0.7027027 0.63888889
|
|
0.60526316 0.62162162 0.72222222 0.65789474]
|
|
|
|
mean value: 0.6788663663663663
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01569414 0.01103926 0.01032233 0.01040292 0.00968933 0.01040936
|
|
0.00962996 0.00992632 0.0103271 0.01052594]
|
|
|
|
mean value: 0.010796666145324707
|
|
|
|
key: score_time
|
|
value: [0.01060677 0.010391 0.00973701 0.00962353 0.00962973 0.00949264
|
|
0.00891089 0.00953937 0.00969148 0.00957203]
|
|
|
|
mean value: 0.009719443321228028
|
|
|
|
key: test_mcc
|
|
value: [-0.0525105 0. 0. 0. 0.56011203 0.47743186
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.0985033385776269
|
|
|
|
key: train_mcc
|
|
value: [0.33726248 0.24341294 0.18341512 0.29360174 0.29360174 0.10776887
|
|
0.24808068 0.29912755 0.38400028 0.18706664]
|
|
|
|
mean value: 0.257733804899941
|
|
|
|
key: test_accuracy
|
|
value: [0.88571429 0.91428571 0.91428571 0.91428571 0.94285714 0.91428571
|
|
0.88571429 0.88571429 0.88571429 0.88571429]
|
|
|
|
mean value: 0.9028571428571428
|
|
|
|
key: train_accuracy
|
|
value: [0.91111111 0.9047619 0.9015873 0.90793651 0.90793651 0.9015873
|
|
0.90793651 0.91111111 0.91746032 0.9047619 ]
|
|
|
|
mean value: 0.9076190476190477
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0.5 0.4 0. 0. 0. 0. ]
|
|
|
|
mean value: 0.09
|
|
|
|
key: train_fscore
|
|
value: [0.22222222 0.16666667 0.11428571 0.21621622 0.21621622 0.06060606
|
|
0.17142857 0.22222222 0.31578947 0.11764706]
|
|
|
|
mean value: 0.18233004223716298
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 1. 1. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.2
|
|
|
|
key: train_precision
|
|
value: [1. 0.75 0.66666667 0.8 0.8 0.5
|
|
0.75 0.8 0.85714286 0.66666667]
|
|
|
|
mean value: 0.7590476190476191
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0.33333333 0.25
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.058333333333333334
|
|
|
|
key: train_recall
|
|
value: [0.125 0.09375 0.0625 0.125 0.125 0.03225806
|
|
0.09677419 0.12903226 0.19354839 0.06451613]
|
|
|
|
mean value: 0.10473790322580645
|
|
|
|
key: test_roc_auc
|
|
value: [0.484375 0.5 0.5 0.5 0.66666667 0.625
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.5276041666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.5625 0.54510822 0.52948322 0.56073322 0.56073322 0.51436847
|
|
0.54662653 0.56275557 0.59501363 0.5304975 ]
|
|
|
|
mean value: 0.5507819561418437
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0.33333333 0.25
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: 0.058333333333333334
|
|
|
|
key: train_jcc
|
|
value: [0.125 0.09090909 0.06060606 0.12121212 0.12121212 0.03125
|
|
0.09375 0.125 0.1875 0.0625 ]
|
|
|
|
mean value: 0.1018939393939394
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01274848 0.01570392 0.01490426 0.01520991 0.0152657 0.01427126
|
|
0.0178206 0.01505232 0.0199163 0.01506782]
|
|
|
|
mean value: 0.01559605598449707
|
|
|
|
key: score_time
|
|
value: [0.00968766 0.01123405 0.01129341 0.01162958 0.01156878 0.01167822
|
|
0.01168966 0.01155591 0.01172066 0.01154947]
|
|
|
|
mean value: 0.011360740661621094
|
|
|
|
key: test_mcc
|
|
value: [-0.09375 0.56011203 0. -0.0525105 0.56011203 0.40507022
|
|
-0.06160411 -0.06160411 0.31316438 -0.06160411]
|
|
|
|
mean value: 0.15073858409358087
|
|
|
|
key: train_mcc
|
|
value: [0.75390889 0.5057508 0.37767861 0.33726248 0.45777849 0.64151359
|
|
0.55455707 0. 0.62023554 0.50601984]
|
|
|
|
mean value: 0.4754705309313954
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.94285714 0.91428571 0.88571429 0.94285714 0.8
|
|
0.85714286 0.85714286 0.82857143 0.85714286]
|
|
|
|
mean value: 0.8714285714285714
|
|
|
|
key: train_accuracy
|
|
value: [0.94603175 0.92380952 0.91428571 0.91111111 0.91428571 0.90793651
|
|
0.93333333 0.9015873 0.89206349 0.92698413]
|
|
|
|
mean value: 0.9171428571428571
|
|
|
|
key: test_fscore
|
|
value: [0. 0.5 0. 0. 0.5 0.46153846
|
|
0. 0. 0.4 0. ]
|
|
|
|
mean value: 0.18615384615384617
|
|
|
|
key: train_fscore
|
|
value: [0.77333333 0.52 0.27027027 0.22222222 0.49056604 0.65882353
|
|
0.55319149 0. 0.63043478 0.5106383 ]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
0.46294799628161776
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0. 0. 1. 0.33333333
|
|
0. 0. 0.33333333 0. ]
|
|
|
|
mean value: 0.26666666666666666
|
|
|
|
key: train_precision
|
|
value: [0.6744186 0.72222222 1. 1. 0.61904762 0.51851852
|
|
0.8125 0. 0.47540984 0.75 ]
|
|
|
|
mean value: 0.6572116800505097
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0. 0. 0.33333333 0.75
|
|
0. 0. 0.5 0. ]
|
|
|
|
mean value: 0.19166666666666665
|
|
|
|
key: train_recall
|
|
value: [0.90625 0.40625 0.15625 0.125 0.40625 0.90322581
|
|
0.41935484 0. 0.93548387 0.38709677]
|
|
|
|
mean value: 0.46451612903225803
|
|
|
|
key: test_roc_auc
|
|
value: [0.453125 0.66666667 0.5 0.484375 0.66666667 0.77822581
|
|
0.48387097 0.48387097 0.68548387 0.48387097]
|
|
|
|
mean value: 0.5686155913978495
|
|
|
|
key: train_roc_auc
|
|
value: [0.92839002 0.69429108 0.578125 0.5625 0.68899072 0.90583826
|
|
0.70439573 0.5 0.91140391 0.68650613]
|
|
|
|
mean value: 0.7160440845230966
|
|
|
|
key: test_jcc
|
|
value: [0. 0.33333333 0. 0. 0.33333333 0.3
|
|
0. 0. 0.25 0. ]
|
|
|
|
mean value: 0.12166666666666666
|
|
|
|
key: train_jcc
|
|
value: [0.63043478 0.35135135 0.15625 0.125 0.325 0.49122807
|
|
0.38235294 0. 0.46031746 0.34285714]
|
|
|
|
mean value: 0.32647917484865596
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01607943 0.01803708 0.01566815 0.01678872 0.01595092 0.01609278
|
|
0.01648283 0.01701713 0.01644254 0.01631045]
|
|
|
|
mean value: 0.0164870023727417
|
|
|
|
key: score_time
|
|
value: [0.01201677 0.01281333 0.01589489 0.01166248 0.01199031 0.01169038
|
|
0.01184368 0.01165247 0.01160431 0.01162291]
|
|
|
|
mean value: 0.012279152870178223
|
|
|
|
key: test_mcc
|
|
value: [-0.09375 0.56011203 0. 0.36432621 0. 0.4672925
|
|
0. -0.10998534 0.29845644 0.31316438]
|
|
|
|
mean value: 0.17996162320446454
|
|
|
|
key: train_mcc
|
|
value: [0.4483239 0.1495634 0.32078373 0.65016689 0.26064427 0.62682638
|
|
0.24194751 0.69572259 0.75815226 0.60472846]
|
|
|
|
mean value: 0.4756859386061948
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.94285714 0.91428571 0.91428571 0.91428571 0.74285714
|
|
0.88571429 0.8 0.88571429 0.82857143]
|
|
|
|
mean value: 0.8657142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.92063492 0.8984127 0.90793651 0.93333333 0.9047619 0.8952381
|
|
0.90793651 0.93650794 0.95873016 0.91428571]
|
|
|
|
mean value: 0.9177777777777778
|
|
|
|
key: test_fscore
|
|
value: [0. 0.5 0. 0.4 0. 0.47058824
|
|
0. 0. 0.33333333 0.4 ]
|
|
|
|
mean value: 0.2103921568627451
|
|
|
|
key: train_fscore
|
|
value: [0.35897436 0.11111111 0.29268293 0.68656716 0.21052632 0.63736264
|
|
0.12121212 0.72222222 0.77966102 0.64 ]
|
|
|
|
mean value: 0.456031987462945
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0. 0.5 0. 0.30769231
|
|
0. 0. 0.5 0.33333333]
|
|
|
|
mean value: 0.2641025641025641
|
|
|
|
key: train_precision
|
|
value: [1. 0.5 0.66666667 0.65714286 0.66666667 0.48333333
|
|
1. 0.63414634 0.82142857 0.54545455]
|
|
|
|
mean value: 0.6974838982156055
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0. 0.33333333 0. 1.
|
|
0. 0. 0.25 0.5 ]
|
|
|
|
mean value: 0.24166666666666667
|
|
|
|
key: train_recall
|
|
value: [0.21875 0.0625 0.1875 0.71875 0.125 0.93548387
|
|
0.06451613 0.83870968 0.74193548 0.77419355]
|
|
|
|
mean value: 0.46673387096774194
|
|
|
|
key: test_roc_auc
|
|
value: [0.453125 0.66666667 0.5 0.65104167 0.5 0.85483871
|
|
0.5 0.4516129 0.60887097 0.68548387]
|
|
|
|
mean value: 0.5871639784946237
|
|
|
|
key: train_roc_auc
|
|
value: [0.609375 0.52771643 0.58844965 0.83817359 0.55896643 0.91316447
|
|
0.53225806 0.89294639 0.86216493 0.85188551]
|
|
|
|
mean value: 0.717510045024507
|
|
|
|
key: test_jcc
|
|
value: [0. 0.33333333 0. 0.25 0. 0.30769231
|
|
0. 0. 0.2 0.25 ]
|
|
|
|
mean value: 0.1341025641025641
|
|
|
|
key: train_jcc
|
|
value: [0.21875 0.05882353 0.17142857 0.52272727 0.11764706 0.46774194
|
|
0.06451613 0.56521739 0.63888889 0.47058824]
|
|
|
|
mean value: 0.32963290123946215
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16216946 0.14904571 0.14963889 0.15114594 0.1511097 0.14998603
|
|
0.14638686 0.1447916 0.1499176 0.1471076 ]
|
|
|
|
mean value: 0.15012993812561035
|
|
|
|
key: score_time
|
|
value: [0.01642919 0.01623583 0.0163157 0.01644063 0.01605439 0.01627493
|
|
0.0161252 0.01544738 0.01584053 0.0149405 ]
|
|
|
|
mean value: 0.016010427474975587
|
|
|
|
key: test_mcc
|
|
value: [-0.10998534 -0.0525105 0.45833333 0.27083333 0.27083333 -0.16339011
|
|
-0.06160411 -0.06160411 0.15322581 0.55144163]
|
|
|
|
mean value: 0.1255573266344062
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8 0.88571429 0.88571429 0.88571429 0.88571429 0.71428571
|
|
0.85714286 0.85714286 0.82857143 0.88571429]
|
|
|
|
mean value: 0.8485714285714285
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0.5 0.33333333 0.33333333 0.
|
|
0. 0. 0.25 0.6 ]
|
|
|
|
mean value: 0.20166666666666666
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0.4 0.33333333 0.33333333 0.
|
|
0. 0. 0.25 0.5 ]
|
|
|
|
mean value: 0.18166666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0.66666667 0.33333333 0.33333333 0.
|
|
0. 0. 0.25 0.75 ]
|
|
|
|
mean value: 0.23333333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.4375 0.484375 0.78645833 0.63541667 0.63541667 0.40322581
|
|
0.48387097 0.48387097 0.5766129 0.8266129 ]
|
|
|
|
mean value: 0.5753360215053763
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0.33333333 0.2 0.2 0.
|
|
0. 0. 0.14285714 0.42857143]
|
|
|
|
mean value: 0.13047619047619047
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.41
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07412291 0.06645656 0.08256459 0.09082556 0.07283092 0.07200861
|
|
0.07845521 0.06757045 0.08594871 0.07623577]
|
|
|
|
mean value: 0.0767019271850586
|
|
|
|
key: score_time
|
|
value: [0.02479577 0.0234735 0.0391748 0.04067969 0.02497649 0.02982092
|
|
0.02192354 0.02468419 0.03167963 0.02793384]
|
|
|
|
mean value: 0.028914237022399904
|
|
|
|
key: test_mcc
|
|
value: [-0.07537784 0.36432621 0. 0.27083333 0.27083333 0.47743186
|
|
0. -0.06160411 0. 0.68534442]
|
|
|
|
mean value: 0.1931787200348778
|
|
|
|
key: train_mcc
|
|
value: [0.94696562 0.91055014 0.92887291 0.89194775 0.92887291 0.86923549
|
|
0.86923549 0.90785413 0.90785413 0.86912525]
|
|
|
|
mean value: 0.9030513824380114
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.91428571 0.91428571 0.88571429 0.88571429 0.91428571
|
|
0.88571429 0.85714286 0.88571429 0.94285714]
|
|
|
|
mean value: 0.8942857142857142
|
|
|
|
key: train_accuracy
|
|
value: [0.99047619 0.98412698 0.98730159 0.98095238 0.98730159 0.97777778
|
|
0.97777778 0.98412698 0.98412698 0.97777778]
|
|
|
|
mean value: 0.9831746031746031
|
|
|
|
key: test_fscore
|
|
value: [0. 0.4 0. 0.33333333 0.33333333 0.4
|
|
0. 0. 0. 0.66666667]
|
|
|
|
mean value: 0.21333333333333332
|
|
|
|
key: train_fscore
|
|
value: [0.95081967 0.91525424 0.93333333 0.9 0.93333333 0.87272727
|
|
0.87272727 0.9122807 0.9122807 0.87719298]
|
|
|
|
mean value: 0.9079949507505407
|
|
|
|
key: test_precision
|
|
value: [0. 0.5 0. 0.33333333 0.33333333 1.
|
|
0. 0. 0. 1. ]
|
|
|
|
mean value: 0.31666666666666665
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.96428571 1. 1.
|
|
1. 1. 1. 0.96153846]
|
|
|
|
mean value: 0.9925824175824176
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0. 0.33333333 0.33333333 0.25
|
|
0. 0. 0. 0.5 ]
|
|
|
|
mean value: 0.175
|
|
|
|
key: train_recall
|
|
value: [0.90625 0.84375 0.875 0.84375 0.875 0.77419355
|
|
0.77419355 0.83870968 0.83870968 0.80645161]
|
|
|
|
mean value: 0.837600806451613
|
|
|
|
key: test_roc_auc
|
|
value: [0.46875 0.65104167 0.5 0.63541667 0.63541667 0.625
|
|
0.5 0.48387097 0.5 0.75 ]
|
|
|
|
mean value: 0.5749495967741935
|
|
|
|
key: train_roc_auc
|
|
value: [0.953125 0.921875 0.9375 0.92010822 0.9375 0.88709677
|
|
0.88709677 0.91935484 0.91935484 0.90146524]
|
|
|
|
mean value: 0.9184476684425487
|
|
|
|
key: test_jcc
|
|
value: [0. 0.25 0. 0.2 0.2 0.25 0. 0. 0. 0.5 ]
|
|
|
|
mean value: 0.14
|
|
|
|
key: train_jcc
|
|
value: [0.90625 0.84375 0.875 0.81818182 0.875 0.77419355
|
|
0.77419355 0.83870968 0.83870968 0.78125 ]
|
|
|
|
mean value: 0.8325238269794721
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.4
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08282042 0.10109305 0.09646273 0.07496476 0.06626391 0.07064819
|
|
0.10043478 0.09072304 0.11865139 0.12150764]
|
|
|
|
mean value: 0.0923569917678833
|
|
|
|
key: score_time
|
|
value: [0.02118278 0.02127814 0.02359033 0.01429105 0.02511001 0.02865458
|
|
0.02759981 0.0214169 0.02571201 0.02672887]
|
|
|
|
mean value: 0.023556447029113768
|
|
|
|
key: test_mcc
|
|
value: [-0.07537784 0. -0.0525105 -0.0525105 0. 0.47743186
|
|
0.47743186 -0.08843154 -0.06160411 0.29845644]
|
|
|
|
mean value: 0.0922885659757876
|
|
|
|
key: train_mcc
|
|
value: [0.87315076 0.87315076 0.91055014 0.89198178 0.87315076 0.92675292
|
|
0.88868803 0.84947542 0.90785413 0.88868803]
|
|
|
|
mean value: 0.888344270331561
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.91428571 0.88571429 0.88571429 0.91428571 0.91428571
|
|
0.91428571 0.82857143 0.85714286 0.88571429]
|
|
|
|
mean value: 0.8857142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.97777778 0.97777778 0.98412698 0.98095238 0.97777778 0.98730159
|
|
0.98095238 0.97460317 0.98412698 0.98095238]
|
|
|
|
mean value: 0.9806349206349206
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0.4
|
|
0.4 0. 0. 0.33333333]
|
|
|
|
mean value: 0.11333333333333334
|
|
|
|
key: train_fscore
|
|
value:/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[0.87719298 0.87719298 0.91525424 0.89655172 0.87719298 0.93103448
|
|
0.89285714 0.85185185 0.9122807 0.89285714]
|
|
|
|
mean value: 0.8924266230873632
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 1. 1. 0. 0. 0.5]
|
|
|
|
mean value: 0.25
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0.25 0.25 0. 0. 0.25]
|
|
|
|
mean value: 0.075
|
|
|
|
key: train_recall
|
|
value: [0.78125 0.78125 0.84375 0.8125 0.78125 0.87096774
|
|
0.80645161 0.74193548 0.83870968 0.80645161]
|
|
|
|
mean value: 0.8064516129032258
|
|
|
|
key: test_roc_auc
|
|
value: [0.46875 0.5 0.484375 0.484375 0.5 0.625
|
|
0.625 0.46774194 0.48387097 0.60887097]
|
|
|
|
mean value: 0.5247983870967742
|
|
|
|
key: train_roc_auc
|
|
value: [0.890625 0.890625 0.921875 0.90625 0.890625 0.93548387
|
|
0.90322581 0.87096774 0.91935484 0.90322581]
|
|
|
|
mean value: 0.9032258064516129
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0.25 0.25 0. 0. 0.2 ]
|
|
|
|
mean value: 0.07
|
|
|
|
key: train_jcc
|
|
value: [0.78125 0.78125 0.84375 0.8125 0.78125 0.87096774
|
|
0.80645161 0.74193548 0.83870968 0.80645161]
|
|
|
|
mean value: 0.8064516129032258
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.53162169 0.51689434 0.51672387 0.51951933 0.52361584 0.5178957
|
|
0.52500558 0.52535439 0.51458359 0.51455021]
|
|
|
|
mean value: 0.5205764532089233
|
|
|
|
key: score_time
|
|
value: [0.00971007 0.00924444 0.00911212 0.00982213 0.00964141 0.00939989
|
|
0.01016474 0.00928998 0.00959373 0.0092063 ]
|
|
|
|
mean value: 0.00951848030090332
|
|
|
|
key: test_mcc
|
|
value: [-0.07537784 0.27083333 0.36432621 0.27083333 0.36432621 -0.12903226
|
|
0. -0.08843154 0. 0.53159579]
|
|
|
|
mean value: 0.15090732428159676
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.88571429 0.91428571 0.88571429 0.91428571 0.77142857
|
|
0.88571429 0.82857143 0.88571429 0.91428571]
|
|
|
|
mean value: 0.8742857142857142
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0. 0.33333333 0.4 0.33333333 0.4 0.
|
|
0. 0. 0. 0.57142857]
|
|
|
|
mean value: 0.20380952380952383
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0. 0.33333333 0.5 0.33333333 0.5 0.
|
|
0. 0. 0. 0.66666667]
|
|
|
|
mean value: 0.23333333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0.33333333 0.33333333 0.33333333 0.
|
|
0. 0. 0. 0.5 ]
|
|
|
|
mean value: 0.18333333333333332
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.46875 0.63541667 0.65104167 0.63541667 0.65104167 0.43548387
|
|
0.5 0.46774194 0.5 0.73387097]
|
|
|
|
mean value: 0.5678763440860215
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0. 0.2 0.25 0.2 0.25 0. 0. 0. 0. 0.4 ]
|
|
|
|
mean value: 0.13
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.41
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02460027 0.03584552 0.03349304 0.03625512 0.03576422 0.03091216
|
|
0.02699423 0.02543378 0.0254457 0.0235455 ]
|
|
|
|
mean value: 0.029828953742980956
|
|
|
|
key: score_time
|
|
value: [0.01228023 0.01229644 0.01371884 0.01243496 0.01492405 0.0151546
|
|
0.01238275 0.01226592 0.0144937 0.01463318]
|
|
|
|
mean value: 0.013458466529846192
|
|
|
|
key: test_mcc
|
|
value: [-0.09375 -0.09375 -0.0525105 -0.09375 -0.07537784 -0.16339011
|
|
-0.08843154 -0.06160411 -0.08843154 -0.08843154]
|
|
|
|
mean value: -0.08994271763295655
|
|
|
|
key: train_mcc
|
|
value: [0. 0. 0. 0. 0. 0.
|
|
0. 0. 0. 0.17081008]
|
|
|
|
mean value: 0.01708100811716764
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.82857143 0.88571429 0.82857143 0.85714286 0.71428571
|
|
0.82857143 0.85714286 0.82857143 0.82857143]
|
|
|
|
mean value: 0.8285714285714286
|
|
|
|
key: train_accuracy
|
|
value: [0.8984127 0.8984127 0.8984127 0.8984127 0.8984127 0.9015873 0.9015873
|
|
0.9015873 0.9015873 0.9047619]
|
|
|
|
mean value: 0.9003174603174603
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.0625]
|
|
|
|
mean value: 0.00625
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
|
|
|
|
mean value: 0.1
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0. 0. 0. 0. 0. 0.
|
|
0. 0. 0. 0.03225806]
|
|
|
|
mean value: 0.0032258064516129032
|
|
|
|
key: test_roc_auc
|
|
value: [0.453125 0.453125 0.484375 0.453125 0.46875 0.40322581
|
|
0.46774194 0.48387097 0.46774194 0.46774194]
|
|
|
|
mean value: 0.4602822580645161
|
|
|
|
key: train_roc_auc
|
|
value: [0.5 0.5 0.5 0.5 0.5 0.5
|
|
0.5 0.5 0.5 0.51612903]
|
|
|
|
mean value: 0.5016129032258064
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0. 0. 0. 0. 0. 0.
|
|
0. 0. 0. 0.03225806]
|
|
|
|
mean value: 0.0032258064516129032
|
|
|
|
MCC on Blind test: -0.07
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02467799 0.03291249 0.0359807 0.03596425 0.01450634 0.01450062
|
|
0.0162096 0.02246547 0.02265882 0.03631139]
|
|
|
|
mean value: 0.025618767738342284
|
|
|
|
key: score_time
|
|
value: [0.0223515 0.0209887 0.0218749 0.02213621 0.01200104 0.01184464
|
|
0.01825714 0.01187491 0.02366185 0.02247262]
|
|
|
|
mean value: 0.018746352195739745
|
|
|
|
key: test_mcc
|
|
value: [-0.09375 0.56011203 0. -0.0525105 0.56011203 0.21080523
|
|
0. -0.06160411 0.29845644 0.47743186]
|
|
|
|
mean value: 0.18990529764599312
|
|
|
|
key: train_mcc
|
|
value: [0.61727597 0.51693921 0.48838221 0.51134507 0.45120653 0.46724374
|
|
0.5266684 0.54667343 0.5266684 0.4596458 ]
|
|
|
|
mean value: 0.511204875608433
|
|
|
|
key: test_accuracy
|
|
value: [0.82857143 0.94285714 0.91428571 0.88571429 0.94285714 0.85714286
|
|
0.88571429 0.85714286 0.88571429 0.91428571]
|
|
|
|
mean value: 0.8914285714285715
|
|
|
|
key: train_accuracy
|
|
value: [0.93968254 0.92698413 0.92380952 0.92698413 0.92063492 0.92380952
|
|
0.93015873 0.93333333 0.93015873 0.92380952]
|
|
|
|
mean value: 0.927936507936508
|
|
|
|
key: test_fscore
|
|
value: [0. 0.5 0. 0. 0.5 0.28571429
|
|
0. 0. 0.33333333 0.4 ]
|
|
|
|
mean value: 0.20190476190476192
|
|
|
|
key: train_fscore
|
|
value: [0.6122449 0.5106383 0.47826087 0.48888889 0.41860465 0.45454545
|
|
0.52173913 0.51162791 0.52173913 0.42857143]
|
|
|
|
mean value: 0.4946860656411614
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0. 0. 1. 0.33333333
|
|
0. 0. 0.5 1. ]
|
|
|
|
mean value: 0.3833333333333333
|
|
|
|
key: train_precision
|
|
value: [0.88235294 0.8 0.78571429 0.84615385 0.81818182 0.76923077
|
|
0.8 0.91666667 0.8 0.81818182]
|
|
|
|
mean value: 0.8236482145305675
|
|
|
|
key: test_recall
|
|
value: [0. 0.33333333 0. 0. 0.33333333 0.25
|
|
0. 0. 0.25 0.25 ]
|
|
|
|
mean value: 0.14166666666666666
|
|
|
|
key: train_recall
|
|
value: [0.46875 0.375 0.34375 0.34375 0.28125 0.32258065
|
|
0.38709677 0.35483871 0.38709677 0.29032258]
|
|
|
|
mean value: 0.3554435483870968
|
|
|
|
key: test_roc_auc
|
|
value: [0.453125 0.66666667 0.5 0.484375 0.66666667 0.59274194
|
|
0.5 0.48387097 0.60887097 0.625 ]
|
|
|
|
mean value: 0.5581317204301075
|
|
|
|
key: train_roc_auc
|
|
value: [0.73084143 0.68219965 0.66657465 0.66834143 0.63709143 0.65600863
|
|
0.6882667 0.67565879 0.6882667 0.64164016]
|
|
|
|
mean value: 0.6734889567944542
|
|
|
|
key: test_jcc
|
|
value: [0. 0.33333333 0. 0. 0.33333333 0.16666667
|
|
0. 0. 0.2 0.25 ]
|
|
|
|
mean value: 0.12833333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.44117647 0.34285714 0.31428571 0.32352941 0.26470588 0.29411765
|
|
0.35294118 0.34375 0.35294118 0.27272727]
|
|
|
|
mean value: 0.33030318945760123
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.24324369 0.23307657 0.16634679 0.23960328 0.36325216 0.29051638
|
|
0.26582289 0.15332031 0.27652693 0.33790112]
|
|
|
|
mean value: 0.2569610118865967
|
|
|
|
key: score_time
|
|
value: [0.0208447 0.01261544 0.02166581 0.02250195 0.02178288 0.021734
|
|
0.02233982 0.01227593 0.02705884 0.02246189]
|
|
|
|
mean value: 0.020528125762939452
|
|
|
|
key: test_mcc
|
|
value: [-0.07537784 0. 0. -0.0525105 0. 0.
|
|
0. 0. 0. 0. ]
|
|
|
|
mean value: -0.012788833929549128
|
|
|
|
key: train_mcc
|
|
value: [0.33726248 0.16782374 0.10541887 0.16782374 0. 0.17081008
|
|
0.18706664 0.24808068 0.24194751 0. ]
|
|
|
|
mean value: 0.1626233747099756
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.91428571 0.91428571 0.88571429 0.91428571 0.88571429
|
|
0.88571429 0.88571429 0.88571429 0.88571429]
|
|
|
|
mean value: 0.8914285714285713
|
|
|
|
key: train_accuracy
|
|
value: [0.91111111 0.9015873 0.8984127 0.9015873 0.8984127 0.9047619
|
|
0.9047619 0.90793651 0.90793651 0.9015873 ]
|
|
|
|
mean value: 0.9038095238095238
|
|
|
|
key: test_fscore
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:114: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:117: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[0.22222222 0.06060606 0.05882353 0.06060606 0. 0.0625
|
|
0.11764706 0.17142857 0.12121212 0. ]
|
|
|
|
mean value: 0.08750456243103301
|
|
|
|
key: test_precision
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.5 1. 0. 1.
|
|
0.66666667 0.75 1. 0. ]
|
|
|
|
mean value: 0.6916666666666667
|
|
|
|
key: test_recall
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_recall
|
|
value: [0.125 0.03125 0.03125 0.03125 0. 0.03225806
|
|
0.06451613 0.09677419 0.06451613 0. ]
|
|
|
|
mean value: 0.04768145161290323
|
|
|
|
key: test_roc_auc
|
|
value: [0.46875 0.5 0.5 0.484375 0.5 0.5 0.5 0.5
|
|
0.5 0.5 ]
|
|
|
|
mean value: 0.4953125
|
|
|
|
key: train_roc_auc
|
|
value: [0.5625 0.515625 0.51385822 0.515625 0.5 0.51612903
|
|
0.5304975 0.54662653 0.53225806 0.5 ]
|
|
|
|
mean value: 0.5233119346851656
|
|
|
|
key: test_jcc
|
|
value: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
|
|
|
|
mean value: 0.0
|
|
|
|
key: train_jcc
|
|
value: [0.125 0.03125 0.03030303 0.03125 0. 0.03225806
|
|
0.0625 0.09375 0.06451613 0. ]
|
|
|
|
mean value: 0.04708272238514174
|
|
|
|
MCC on Blind test: 0.03
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.046731 0.03823352 0.0418098 0.03778481 0.07193589 0.04524374
|
|
0.03778577 0.03792214 0.07436538 0.0377152 ]
|
|
|
|
mean value: 0.04695272445678711
|
|
|
|
key: score_time
|
|
value: [0.01246572 0.01210189 0.01210809 0.01457357 0.01514077 0.01474285
|
|
0.01471972 0.01477337 0.01217771 0.01599526]
|
|
|
|
mean value: 0.013879895210266113
|
|
|
|
key: test_mcc
|
|
value: [0.84530217 0.93844649 0.87298387 0.90900317 0.96875 0.77800241
|
|
0.93832585 0.85168687 0.78094752 0.82408564]
|
|
|
|
mean value: 0.8707534011721779
|
|
|
|
key: train_mcc
|
|
value: [0.91386104 0.89640037 0.90347357 0.90678961 0.89264674 0.90640242
|
|
0.903067 0.89642565 0.903067 0.91646899]
|
|
|
|
mean value: 0.9038602395938505
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.96825397 0.93650794 0.95238095 0.98412698 0.88888889
|
|
0.96825397 0.92063492 0.88888889 0.9047619 ]
|
|
|
|
mean value: 0.9333333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.95590829 0.94708995 0.95061728 0.95238095 0.94532628 0.95238095
|
|
0.95061728 0.94708995 0.95061728 0.95767196]
|
|
|
|
mean value: 0.9509700176366843
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.96875 0.93548387 0.95384615 0.98412698 0.89230769
|
|
0.96969697 0.92753623 0.89552239 0.91428571]
|
|
|
|
mean value: 0.9364632928251938
|
|
|
|
key: train_fscore
|
|
value: [0.95741056 0.94897959 0.95238095 0.95400341 0.9471891 0.95368782
|
|
0.95205479 0.94880546 0.95205479 0.95862069]
|
|
|
|
mean value: 0.9525187171716681
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.93939394 0.93548387 0.91176471 0.96875 0.87878788
|
|
0.94117647 0.86486486 0.85714286 0.84210526]
|
|
|
|
mean value: 0.9021822791962235
|
|
|
|
key: train_precision
|
|
value: [0.92739274 0.91776316 0.92105263 0.92409241 0.91749175 0.92666667
|
|
0.92358804 0.91749175 0.92358804 0.93602694]
|
|
|
|
mean value: 0.9235154118766192
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.93548387 1. 1. 0.90625
|
|
1. 1. 0.9375 1. ]
|
|
|
|
mean value: 0.9746975806451613
|
|
|
|
key: train_recall
|
|
value: [0.98943662 0.98239437 0.98591549 0.98591549 0.97887324 0.98233216
|
|
0.98233216 0.98233216 0.98233216 0.98233216]
|
|
|
|
mean value: 0.9834195988652765
|
|
|
|
key: test_roc_auc
|
|
value: [0.92137097 0.96875 0.93649194 0.953125 0.984375 0.88860887
|
|
0.96774194 0.91935484 0.88810484 0.90322581]
|
|
|
|
mean value: 0.9331149193548387
|
|
|
|
key: train_roc_auc
|
|
value: [0.95584905 0.94702757 0.95055492 0.9523217 0.94526701 0.95243368
|
|
0.95067312 0.94715199 0.95067312 0.95771537]
|
|
|
|
mean value: 0.9509667545911511
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.93939394 0.87878788 0.91176471 0.96875 0.80555556
|
|
0.94117647 0.86486486 0.81081081 0.84210526]
|
|
|
|
mean value: 0.8820352346184389
|
|
|
|
key: train_jcc
|
|
value: [0.91830065 0.90291262 0.90909091 0.91205212 0.89967638 0.91147541
|
|
0.90849673 0.9025974 0.90849673 0.9205298 ]
|
|
|
|
mean value: 0.9093628754523537
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.47
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.92849064 1.0602355 0.92951155 1.13837886 0.95118618 1.06895828
|
|
0.8453784 0.89579535 1.1051271 0.96467185]
|
|
|
|
mean value: 0.9887733697891236
|
|
|
|
key: score_time
|
|
value: [0.01613951 0.01567912 0.01208901 0.02056813 0.01270127 0.03346825
|
|
0.01612878 0.01232648 0.01497793 0.01453519]
|
|
|
|
mean value: 0.016861367225646972
|
|
|
|
key: test_mcc
|
|
value: [0.84530217 0.93844649 0.87298387 0.90900317 0.96875 0.77800241
|
|
0.93832585 0.87988269 0.78094752 0.82408564]
|
|
|
|
mean value: 0.8735729827569131
|
|
|
|
key: train_mcc
|
|
value: [0.9685478 0.91345109 0.92051926 0.96168178 0.90638293 0.93765708
|
|
0.91681307 0.92053575 0.9131033 0.92662845]
|
|
|
|
mean value: 0.9285320512916778
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.96825397 0.93650794 0.95238095 0.98412698 0.88888889
|
|
0.96825397 0.93650794 0.88888889 0.9047619 ]
|
|
|
|
mean value: 0.9349206349206349
|
|
|
|
key: train_accuracy
|
|
value: [0.98412698 0.95590829 0.95943563 0.98059965 0.95238095 0.96825397
|
|
0.95767196 0.95943563 0.95590829 0.96296296]
|
|
|
|
mean value: 0.963668430335097
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.96875 0.93548387 0.95384615 0.98412698 0.89230769
|
|
0.96969697 0.94117647 0.89552239 0.91428571]
|
|
|
|
mean value: 0.9378273166956116
|
|
|
|
key: train_fscore
|
|
value: [0.98434783 0.95726496 0.96068376 0.98093588 0.95384615 0.96896552
|
|
0.95876289 0.96054889 0.95697074 0.96360485]
|
|
|
|
mean value: 0.9645931454804549
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.93939394 0.93548387 0.91176471 0.96875 0.87878788
|
|
0.94117647 0.88888889 0.85714286 0.84210526]
|
|
|
|
mean value: 0.9045846815986259
|
|
|
|
key: train_precision
|
|
value: [0.97250859 0.93023256 0.93355482 0.96587031 0.9269103 0.94612795
|
|
0.93311037 0.93333333 0.93288591 0.94557823]
|
|
|
|
mean value: 0.9420112357338174
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.93548387 1. 1. 0.90625
|
|
1. 1. 0.9375 1. ]
|
|
|
|
mean value: 0.9746975806451613
|
|
|
|
key: train_recall
|
|
value: [0.99647887 0.98591549 0.98943662 0.99647887 0.98239437 0.99293286
|
|
0.98586572 0.98939929 0.98233216 0.98233216]
|
|
|
|
mean value: 0.9883566416164834
|
|
|
|
key: test_roc_auc
|
|
value: [0.92137097 0.96875 0.93649194 0.953125 0.984375 0.88860887
|
|
0.96774194 0.93548387 0.88810484 0.90322581]
|
|
|
|
mean value: 0.9347278225806451
|
|
|
|
key: train_roc_auc
|
|
value: [0.98410516 0.95585527 0.95938262 0.98057159 0.95232793 0.96829742
|
|
0.95772159 0.95948838 0.95595481 0.96299706]
|
|
|
|
mean value: 0.9636701836460458
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.93939394 0.87878788 0.91176471 0.96875 0.80555556
|
|
0.94117647 0.88888889 0.81081081 0.84210526]
|
|
|
|
mean value: 0.8844376370208413
|
|
|
|
key: train_jcc
|
|
value: [0.96917808 0.91803279 0.92434211 0.96258503 0.91176471 0.93979933
|
|
0.92079208 0.92409241 0.91749175 0.92976589]
|
|
|
|
mean value: 0.9317844169251209
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01578856 0.01173925 0.01149726 0.01086855 0.01088572 0.01156378
|
|
0.01091862 0.01160908 0.01126027 0.01059556]
|
|
|
|
mean value: 0.011672663688659667
|
|
|
|
key: score_time
|
|
value: [0.01489091 0.01034808 0.00981307 0.00902915 0.00896502 0.00910258
|
|
0.00916219 0.00911522 0.00939226 0.00944948]
|
|
|
|
mean value: 0.009926795959472656
|
|
|
|
key: test_mcc
|
|
value: [0.66853948 0.77211 0.64134943 0.79833297 0.84530217 0.62939541
|
|
0.72270545 0.74424094 0.75156646 0.62469891]
|
|
|
|
mean value: 0.7198241218034568
|
|
|
|
key: train_mcc
|
|
value: [0.74157536 0.74359996 0.74349147 0.74698014 0.74157536 0.74247033
|
|
0.74118023 0.73819414 0.74118023 0.7531774 ]
|
|
|
|
mean value: 0.7433424613755777
|
|
|
|
key: test_accuracy
|
|
value: [0.82539683 0.87301587 0.80952381 0.88888889 0.92063492 0.80952381
|
|
0.85714286 0.85714286 0.87301587 0.79365079]
|
|
|
|
mean value: 0.8507936507936508
|
|
|
|
key: train_accuracy
|
|
value: [0.86243386 0.86067019 0.86419753 0.86419753 0.86243386 0.86067019
|
|
0.86067019 0.85890653 0.86067019 0.86772487]
|
|
|
|
mean value: 0.8622574955908289
|
|
|
|
key: test_fscore
|
|
value: [0.84057971 0.88571429 0.82857143 0.89855072 0.92307692 0.82857143
|
|
0.86956522 0.87671233 0.88235294 0.82666667]
|
|
|
|
mean value: 0.8660361654718239
|
|
|
|
key: train_fscore
|
|
value: [0.87579618 0.87598116 0.8768 0.87797147 0.87579618 0.87519747
|
|
0.8748019 0.87341772 0.8748019 0.88038278]
|
|
|
|
mean value: 0.876094676471338
|
|
|
|
key: test_precision
|
|
value: [0.76315789 0.79487179 0.74358974 0.81578947 0.88235294 0.76315789
|
|
0.81081081 0.7804878 0.83333333 0.72093023]
|
|
|
|
mean value: 0.7908481924376236
|
|
|
|
key: train_precision
|
|
value: [0.7994186 0.79036827 0.80351906 0.79827089 0.7994186 0.79142857
|
|
0.79310345 0.79083095 0.79310345 0.80232558]
|
|
|
|
mean value: 0.7961787431146718
|
|
|
|
key: test_recall
|
|
value: [0.93548387 1. 0.93548387 1. 0.96774194 0.90625
|
|
0.9375 1. 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9588709677419355
|
|
|
|
key: train_recall
|
|
value: [0.96830986 0.98239437 0.96478873 0.97535211 0.96830986 0.97879859
|
|
0.97526502 0.97526502 0.97526502 0.97526502]
|
|
|
|
mean value: 0.9739013586821281
|
|
|
|
key: test_roc_auc
|
|
value: [0.82711694 0.875 0.81149194 0.890625 0.92137097 0.80796371
|
|
0.85584677 0.85483871 0.87197581 0.79082661]
|
|
|
|
mean value: 0.8507056451612903
|
|
|
|
key: train_roc_auc
|
|
value: [0.8622468 0.86045513 0.86401981 0.86400114 0.8622468 0.86087817
|
|
0.86087195 0.85911138 0.86087195 0.8679142 ]
|
|
|
|
mean value: 0.8622617329418205
|
|
|
|
key: test_jcc
|
|
value: [0.725 0.79487179 0.70731707 0.81578947 0.85714286 0.70731707
|
|
0.76923077 0.7804878 0.78947368 0.70454545]
|
|
|
|
mean value: 0.7651175984905125
|
|
|
|
key: train_jcc
|
|
value: [0.77903683 0.77932961 0.78062678 0.78248588 0.77903683 0.77808989
|
|
0.77746479 0.7752809 0.77746479 0.78632479]
|
|
|
|
mean value: 0.7795141069968906
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01120138 0.01067066 0.0103941 0.01164055 0.01050735 0.01070833
|
|
0.01115823 0.01072073 0.01080966 0.0109427 ]
|
|
|
|
mean value: 0.010875368118286132
|
|
|
|
key: score_time
|
|
value: [0.00904727 0.00912952 0.00927854 0.0089941 0.00900531 0.0089252
|
|
0.00921464 0.00901341 0.00898576 0.0093987 ]
|
|
|
|
mean value: 0.009099245071411133
|
|
|
|
key: test_mcc
|
|
value: [0.64134943 0.72407013 0.60364273 0.75254943 0.88034084 0.71705182
|
|
0.71705182 0.68740835 0.71705182 0.56449867]
|
|
|
|
mean value: 0.7005015034599485
|
|
|
|
key: train_mcc
|
|
value: [0.72022433 0.71543362 0.7251175 0.7178705 0.70361491 0.72038095
|
|
0.71955095 0.71155171 0.72051044 0.73467563]
|
|
|
|
mean value: 0.7188930537628848
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.85714286 0.79365079 0.87301587 0.93650794 0.85714286
|
|
0.85714286 0.84126984 0.85714286 0.77777778]
|
|
|
|
mean value: 0.846031746031746
|
|
|
|
key: train_accuracy
|
|
value: [0.85537919 0.85361552 0.85714286 0.85537919 0.84832451 0.85537919
|
|
0.85537919 0.85361552 0.85714286 0.86243386]
|
|
|
|
mean value: 0.855379188712522
|
|
|
|
key: test_fscore
|
|
value: [0.82857143 0.86567164 0.8115942 0.87878788 0.93939394 0.86567164
|
|
0.86567164 0.85294118 0.86567164 0.8 ]
|
|
|
|
mean value: 0.8573975193286565
|
|
|
|
key: train_fscore
|
|
value: [0.86644951 0.86415712 0.86871961 0.86513158 0.85855263 0.86601307
|
|
0.86557377 0.86097152 0.86567164 0.87254902]
|
|
|
|
mean value: 0.8653789480498528
|
|
|
|
key: test_precision
|
|
value: [0.74358974 0.80555556 0.73684211 0.82857143 0.88571429 0.82857143
|
|
0.82857143 0.80555556 0.82857143 0.73684211]
|
|
|
|
mean value: 0.802838506522717
|
|
|
|
key: train_precision
|
|
value: [0.80606061 0.80733945 0.8048048 0.8117284 0.80555556 0.80547112
|
|
0.80733945 0.81847134 0.815625 0.81155015]
|
|
|
|
mean value: 0.8093945874740627
|
|
|
|
key: test_recall
|
|
value: [0.93548387 0.93548387 0.90322581 0.93548387 1. 0.90625
|
|
0.90625 0.90625 0.90625 0.875 ]
|
|
|
|
mean value: 0.9209677419354838
|
|
|
|
key: train_recall
|
|
value: [0.93661972 0.92957746 0.94366197 0.92605634 0.91901408 0.93639576
|
|
0.93286219 0.90812721 0.92226148 0.9434629 ]
|
|
|
|
mean value: 0.9298039118100832
|
|
|
|
key: test_roc_auc
|
|
value: [0.81149194 0.85836694 0.7953629 0.87399194 0.9375 0.85635081
|
|
0.85635081 0.84022177 0.85635081 0.77620968]
|
|
|
|
mean value: 0.8462197580645161
|
|
|
|
key: train_roc_auc
|
|
value: [0.85523565 0.85348131 0.85699 0.85525432 0.84819962 0.85552182
|
|
0.8555156 0.85371149 0.8572575 0.86257652]
|
|
|
|
mean value: 0.8553743841138705
|
|
|
|
key: test_jcc
|
|
value: [0.70731707 0.76315789 0.68292683 0.78378378 0.88571429 0.76315789
|
|
0.76315789 0.74358974 0.76315789 0.66666667]
|
|
|
|
mean value: 0.7522629961140873
|
|
|
|
key: train_jcc
|
|
value: [0.76436782 0.76080692 0.76790831 0.76231884 0.75216138 0.76368876
|
|
0.76300578 0.75588235 0.76315789 0.77391304]
|
|
|
|
mean value: 0.7627211098149084
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01026583 0.00973606 0.01091933 0.01110077 0.01095653 0.01100445
|
|
0.0109899 0.01106548 0.01055384 0.00988722]
|
|
|
|
mean value: 0.010647940635681152
|
|
|
|
key: score_time
|
|
value: [0.01302862 0.0183332 0.01312423 0.01305318 0.01321793 0.0131464
|
|
0.01319385 0.01362038 0.01306939 0.01342177]
|
|
|
|
mean value: 0.013720893859863281
|
|
|
|
key: test_mcc
|
|
value: [0.79833297 0.90900317 0.72098341 0.76058095 0.85238636 0.62469891
|
|
0.85168687 0.85168687 0.78719616 0.75156646]
|
|
|
|
mean value: 0.7908122136874359
|
|
|
|
key: train_mcc
|
|
value: [0.86310294 0.84137632 0.86120582 0.87150596 0.86051286 0.85807335
|
|
0.84778672 0.86445655 0.85489423 0.84624377]
|
|
|
|
mean value: 0.8569158510273078
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.95238095 0.84126984 0.87301587 0.92063492 0.79365079
|
|
0.92063492 0.92063492 0.88888889 0.87301587]
|
|
|
|
mean value: 0.8873015873015873
|
|
|
|
key: train_accuracy
|
|
value: [0.92945326 0.91710758 0.92768959 0.9329806 0.92768959 0.92592593
|
|
0.92063492 0.92945326 0.92416226 0.91887125]
|
|
|
|
mean value: 0.9253968253968253
|
|
|
|
key: test_fscore
|
|
value: [0.89855072 0.95384615 0.86111111 0.88235294 0.92537313 0.82666667
|
|
0.92753623 0.92753623 0.89855072 0.88235294]
|
|
|
|
mean value: 0.898387686134871
|
|
|
|
key: train_fscore
|
|
value: [0.93288591 0.92231405 0.93178037 0.93666667 0.93155259 0.93
|
|
0.92512479 0.93311037 0.92845258 0.92409241]
|
|
|
|
mean value: 0.9295979724178515
|
|
|
|
key: test_precision
|
|
value: [0.81578947 0.91176471 0.75609756 0.81081081 0.86111111 0.72093023
|
|
0.86486486 0.86486486 0.83783784 0.83333333]
|
|
|
|
mean value: 0.8277404795923136
|
|
|
|
key: train_precision
|
|
value: [0.89102564 0.86915888 0.88328076 0.88924051 0.88571429 0.88012618
|
|
0.87421384 0.88571429 0.87735849 0.86687307]
|
|
|
|
mean value: 0.8802705929410596
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.96774194 1. 0.96875
|
|
1. 1. 0.96875 0.9375 ]
|
|
|
|
mean value: 0.9842741935483871
|
|
|
|
key: train_recall
|
|
value: [0.97887324 0.98239437 0.98591549 0.98943662 0.98239437 0.98586572
|
|
0.98233216 0.98586572 0.98586572 0.98939929]
|
|
|
|
mean value: 0.984834270641517
|
|
|
|
key: test_roc_auc
|
|
value: [0.890625 0.953125 0.84375 0.87449597 0.921875 0.79082661
|
|
0.91935484 0.91935484 0.88760081 0.87197581]
|
|
|
|
mean value: 0.8872983870967742
|
|
|
|
key: train_roc_auc
|
|
value: [0.92936595 0.91699224 0.92758672 0.93288085 0.92759294 0.92603145
|
|
0.92074354 0.92955258 0.92427089 0.91899542]
|
|
|
|
mean value: 0.9254012591449758
|
|
|
|
key: test_jcc
|
|
value: [0.81578947 0.91176471 0.75609756 0.78947368 0.86111111 0.70454545
|
|
0.86486486 0.86486486 0.81578947 0.78947368]
|
|
|
|
mean value: 0.8173774878033732
|
|
|
|
key: train_jcc
|
|
value: [0.87421384 0.85582822 0.87227414 0.88087774 0.871875 0.86915888
|
|
0.86068111 0.87460815 0.86645963 0.85889571]
|
|
|
|
mean value: 0.8684872419962413
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02566004 0.02351904 0.02392268 0.02339339 0.02320099 0.02444601
|
|
0.02347302 0.024194 0.02488732 0.02433801]
|
|
|
|
mean value: 0.024103450775146484
|
|
|
|
key: score_time
|
|
value: [0.01250339 0.01303148 0.01228809 0.01211977 0.01276565 0.01305032
|
|
0.01209092 0.01211333 0.01220918 0.01217484]
|
|
|
|
mean value: 0.012434697151184082
|
|
|
|
key: test_mcc
|
|
value: [0.87487431 1. 0.78160117 0.90524194 0.96875 0.78160117
|
|
0.96871896 0.93832585 0.8415746 0.8415746 ]
|
|
|
|
mean value: 0.8902262589244235
|
|
|
|
key: train_mcc
|
|
value: [0.91901143 0.91554558 0.91892094 0.91914772 0.91209057 0.9367199
|
|
0.91196088 0.92260788 0.92597892 0.92966389]
|
|
|
|
mean value: 0.9211647709528938
|
|
|
|
key: test_accuracy
|
|
value: [0.93650794 1. 0.88888889 0.95238095 0.98412698 0.88888889
|
|
0.98412698 0.96825397 0.92063492 0.92063492]
|
|
|
|
mean value: 0.9444444444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.95943563 0.95767196 0.95943563 0.95943563 0.95590829 0.96825397
|
|
0.95590829 0.96119929 0.96296296 0.96472663]
|
|
|
|
mean value: 0.9604938271604938
|
|
|
|
key: test_fscore
|
|
value: [0.9375 1. 0.89230769 0.95238095 0.98412698 0.8852459
|
|
0.98461538 0.96969697 0.92307692 0.92307692]
|
|
|
|
mean value: 0.9452027730921173
|
|
|
|
key: train_fscore
|
|
value: [0.95986038 0.95818815 0.95971979 0.96 0.95652174 0.96853147
|
|
0.95621716 0.96153846 0.96309315 0.96503497]
|
|
|
|
mean value: 0.960870527007407
|
|
|
|
key: test_precision
|
|
value: [0.90909091 1. 0.85294118 0.9375 0.96875 0.93103448
|
|
0.96969697 0.94117647 0.90909091 0.90909091]
|
|
|
|
mean value: 0.9328371826787141
|
|
|
|
key: train_precision
|
|
value: [0.95155709 0.94827586 0.95470383 0.94845361 0.94501718 0.95847751
|
|
0.94791667 0.95155709 0.95804196 0.9550173 ]
|
|
|
|
mean value: 0.9519018106448003
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.93548387 0.96774194 1. 0.84375
|
|
1. 1. 0.9375 0.9375 ]
|
|
|
|
mean value: 0.9589717741935484
|
|
|
|
key: train_recall
|
|
value: [0.96830986 0.96830986 0.96478873 0.97183099 0.96830986 0.97879859
|
|
0.96466431 0.97173145 0.96819788 0.97526502]
|
|
|
|
mean value: 0.9700206539590902
|
|
|
|
key: test_roc_auc
|
|
value: [0.93699597 1. 0.88961694 0.95262097 0.984375 0.88961694
|
|
0.98387097 0.96774194 0.9203629 0.9203629 ]
|
|
|
|
mean value: 0.9445564516129032
|
|
|
|
key: train_roc_auc
|
|
value: [0.95941995 0.95765316 0.95942617 0.95941373 0.95588638 0.96827253
|
|
0.9559237 0.96121784 0.96297218 0.96474518]
|
|
|
|
mean value: 0.9604930821679192
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 1. 0.80555556 0.90909091 0.96875 0.79411765
|
|
0.96969697 0.94117647 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8985026207452678
|
|
|
|
key: train_jcc
|
|
value: [0.92281879 0.91973244 0.92255892 0.92307692 0.91666667 0.93898305
|
|
0.91610738 0.92592593 0.92881356 0.93243243]
|
|
|
|
mean value: 0.9247116096798579
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.03710318 2.32267642 2.53339148 2.10705328 2.14102483 2.20159388
|
|
2.13356161 2.03490043 2.03021073 2.07961822]
|
|
|
|
mean value: 2.162113404273987
|
|
|
|
key: score_time
|
|
value: [0.01250243 0.01412129 0.01420093 0.01445484 0.02245712 0.01304412
|
|
0.01836061 0.02526736 0.01882219 0.02280641]
|
|
|
|
mean value: 0.017603731155395506
|
|
|
|
key: test_mcc
|
|
value: [0.87487431 1. 0.87462485 0.90524194 0.96875 0.71471774
|
|
0.93832585 0.90873893 0.90524194 0.84484323]
|
|
|
|
mean value: 0.8935358793154082
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.99647887 1. 0.99647887]
|
|
|
|
mean value: 0.9992957746478873
|
|
|
|
key: test_accuracy
|
|
value: [0.93650794 1. 0.93650794 0.95238095 0.98412698 0.85714286
|
|
0.96825397 0.95238095 0.95238095 0.92063492]
|
|
|
|
mean value: 0.946031746031746
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.99823633 1. 0.99823633]
|
|
|
|
mean value: 0.999647266313933
|
|
|
|
key: test_fscore
|
|
value: [0.9375 1. 0.93333333 0.95238095 0.98412698 0.85714286
|
|
0.96969697 0.95522388 0.95238095 0.92537313]
|
|
|
|
mean value: 0.9467159063987423
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.99823633 1. 0.99823633]
|
|
|
|
mean value: 0.999647266313933
|
|
|
|
key: test_precision
|
|
value: [0.90909091 1. 0.96551724 0.9375 0.96875 0.87096774
|
|
0.94117647 0.91428571 0.96774194 0.88571429]
|
|
|
|
mean value: 0.936074429847781
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.99647887 1. 0.99647887]
|
|
|
|
mean value: 0.9992957746478873
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.90322581 0.96774194 1. 0.84375
|
|
1. 1. 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9588709677419355
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.93699597 1. 0.9359879 0.95262097 0.984375 0.85735887
|
|
0.96774194 0.9516129 0.95262097 0.91985887]
|
|
|
|
mean value: 0.9459173387096774
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.99823944 1. 0.99823944]
|
|
|
|
mean value: 0.9996478873239437
|
|
|
|
key: test_jcc
|
|
value: [0.88235294 1. 0.875 0.90909091 0.96875 0.75
|
|
0.94117647 0.91428571 0.90909091 0.86111111]
|
|
|
|
mean value: 0.9010858055343349
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.99647887 1. 0.99647887]
|
|
|
|
mean value: 0.9992957746478873
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.06945992 0.05328107 0.04494476 0.04605842 0.05913305 0.0532124
|
|
0.0525403 0.05411696 0.04540658 0.04035687]
|
|
|
|
mean value: 0.05185103416442871
|
|
|
|
key: score_time
|
|
value: [0.00934982 0.00895405 0.00894928 0.00906849 0.00895882 0.00880551
|
|
0.00900245 0.00878859 0.00896502 0.00892091]
|
|
|
|
mean value: 0.008976292610168458
|
|
|
|
key: test_mcc
|
|
value: [0.58770161 0.62475802 0.74772995 0.71471774 0.84173387 0.71471774
|
|
0.8415746 0.82408564 0.84484323 0.74596774]
|
|
|
|
mean value: 0.7487830141976113
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.79365079 0.80952381 0.87301587 0.85714286 0.92063492 0.85714286
|
|
0.92063492 0.9047619 0.92063492 0.87301587]
|
|
|
|
mean value: 0.873015873015873
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.79365079 0.81818182 0.875 0.85714286 0.92063492 0.85714286
|
|
0.92307692 0.91428571 0.92537313 0.875 ]
|
|
|
|
mean value: 0.8759489018444242
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.78125 0.77142857 0.84848485 0.84375 0.90625 0.87096774
|
|
0.90909091 0.84210526 0.88571429 0.875 ]
|
|
|
|
mean value: 0.8534041619811993
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.80645161 0.87096774 0.90322581 0.87096774 0.93548387 0.84375
|
|
0.9375 1. 0.96875 0.875 ]
|
|
|
|
mean value: 0.9012096774193549
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.79385081 0.81048387 0.8734879 0.85735887 0.92086694 0.85735887
|
|
0.9203629 0.90322581 0.91985887 0.87298387]
|
|
|
|
mean value: 0.872983870967742
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.65789474 0.69230769 0.77777778 0.75 0.85294118 0.75
|
|
0.85714286 0.84210526 0.86111111 0.77777778]
|
|
|
|
mean value: 0.7819058392587804
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.44
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12184596 0.12592673 0.12110901 0.12176442 0.12262321 0.12329173
|
|
0.12291861 0.12309837 0.1223855 0.12486649]
|
|
|
|
mean value: 0.12298300266265869
|
|
|
|
key: score_time
|
|
value: [0.01783848 0.01782966 0.01787925 0.01792359 0.01792836 0.01784134
|
|
0.01788831 0.017869 0.01796222 0.01787543]
|
|
|
|
mean value: 0.017883563041687013
|
|
|
|
key: test_mcc
|
|
value: [0.93844649 0.96875 0.90514678 0.93649194 1. 0.81130213
|
|
0.96871896 0.93832585 0.93649194 0.90514678]
|
|
|
|
mean value: 0.9308820859961903
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96825397 0.98412698 0.95238095 0.96825397 1. 0.9047619
|
|
0.98412698 0.96825397 0.96825397 0.95238095]
|
|
|
|
mean value: 0.9650793650793651
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.98412698 0.95081967 0.96774194 1. 0.90322581
|
|
0.98461538 0.96969697 0.96875 0.95384615]
|
|
|
|
mean value: 0.9651572906352124
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.96875 0.96666667 0.96774194 1. 0.93333333
|
|
0.96969697 0.94117647 0.96875 0.93939394]
|
|
|
|
mean value: 0.9594903254556955
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.93548387 0.96774194 1. 0.875
|
|
1. 1. 0.96875 0.96875 ]
|
|
|
|
mean value: 0.9715725806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.984375 0.95211694 0.96824597 1. 0.90524194
|
|
0.98387097 0.96774194 0.96824597 0.95211694]
|
|
|
|
mean value: 0.965070564516129
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.96875 0.90625 0.9375 1. 0.82352941
|
|
0.96969697 0.94117647 0.93939394 0.91176471]
|
|
|
|
mean value: 0.9337455436720142
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.41
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01043081 0.01031065 0.0102365 0.01026797 0.01031113 0.01034284
|
|
0.01207876 0.01058125 0.0105741 0.01048827]
|
|
|
|
mean value: 0.01056222915649414
|
|
|
|
key: score_time
|
|
value: [0.00874019 0.00868964 0.00870824 0.00867963 0.00870752 0.00874615
|
|
0.00962138 0.00881481 0.00888348 0.0088563 ]
|
|
|
|
mean value: 0.008844733238220215
|
|
|
|
key: test_mcc
|
|
value: [0.82507166 0.84173387 0.63159952 0.77822581 0.87487431 0.55611985
|
|
0.77822581 0.81092385 0.61895161 0.55611985]
|
|
|
|
mean value: 0.7271846136943092
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.92063492 0.80952381 0.88888889 0.93650794 0.77777778
|
|
0.88888889 0.9047619 0.80952381 0.77777778]
|
|
|
|
mean value: 0.861904761904762
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91176471 0.92063492 0.82352941 0.88888889 0.9375 0.78787879
|
|
0.88888889 0.90909091 0.8125 0.78787879]
|
|
|
|
mean value: 0.8668555300908242
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83783784 0.90625 0.75675676 0.875 0.90909091 0.76470588
|
|
0.90322581 0.88235294 0.8125 0.76470588]
|
|
|
|
mean value: 0.841242601601947
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.93548387 0.90322581 0.90322581 0.96774194 0.8125
|
|
0.875 0.9375 0.8125 0.8125 ]
|
|
|
|
mean value: 0.8959677419354839
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.92086694 0.8109879 0.8891129 0.93699597 0.77721774
|
|
0.8891129 0.90423387 0.80947581 0.77721774]
|
|
|
|
mean value: 0.8621471774193549
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83783784 0.85294118 0.7 0.8 0.88235294 0.65
|
|
0.8 0.83333333 0.68421053 0.65 ]
|
|
|
|
mean value: 0.7690675815134019
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.35388255 2.40788078 2.4095149 2.36452222 2.32650852 2.29342246
|
|
2.33249092 2.32576632 2.30348825 2.32206368]
|
|
|
|
mean value: 2.343954062461853
|
|
|
|
key: score_time
|
|
value: [0.15916395 0.09987593 0.10035968 0.09533596 0.0920496 0.09907556
|
|
0.10007405 0.0986793 0.09491301 0.09696269]
|
|
|
|
mean value: 0.1036489725112915
|
|
|
|
key: test_mcc
|
|
value: [0.90900317 0.96875 0.93832585 0.87298387 1. 0.78160117
|
|
0.93649194 0.93832585 0.90524194 0.84173387]
|
|
|
|
mean value: 0.9092457660661265
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.98412698 0.96825397 0.93650794 1. 0.88888889
|
|
0.96825397 0.96825397 0.95238095 0.92063492]
|
|
|
|
mean value: 0.9539682539682539
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.98412698 0.96666667 0.93548387 1. 0.8852459
|
|
0.96875 0.96969697 0.95238095 0.92063492]
|
|
|
|
mean value: 0.9536832419959733
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.96875 1. 0.93548387 1. 0.93103448
|
|
0.96875 0.94117647 0.96774194 0.93548387]
|
|
|
|
mean value: 0.9560185336648563
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.93548387 0.93548387 1. 0.84375
|
|
0.96875 1. 0.9375 0.90625 ]
|
|
|
|
mean value: 0.9527217741935484
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.984375 0.96774194 0.93649194 1. 0.88961694
|
|
0.96824597 0.96774194 0.95262097 0.92086694]
|
|
|
|
mean value: 0.9540826612903226
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.96875 0.93548387 0.87878788 1. 0.79411765
|
|
0.93939394 0.94117647 0.90909091 0.85294118]
|
|
|
|
mean value: 0.9131506598240469
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.4
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.98013568 1.10599613 1.0370276 0.99879122 1.01508045 1.01929688
|
|
1.01361489 1.04204082 1.001688 1.05617952]
|
|
|
|
mean value: 1.0269851207733154
|
|
|
|
key: score_time
|
|
value: [0.2499814 0.27505255 0.27093649 0.20159578 0.23592544 0.16969681
|
|
0.21626568 0.26633334 0.28520155 0.25466394]
|
|
|
|
mean value: 0.24256529808044433
|
|
|
|
key: test_mcc
|
|
value: [0.84530217 0.93844649 0.90524194 0.8415746 0.96875 0.77822581
|
|
0.90524194 0.93832585 0.90524194 0.81130213]
|
|
|
|
mean value: 0.8837652853158429
|
|
|
|
key: train_mcc
|
|
value: [0.9541507 0.95061713 0.95064017 0.95415013 0.95064017 0.96475097
|
|
0.94358662 0.9541507 0.9506414 0.95776821]
|
|
|
|
mean value: 0.9531096201713586
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.96825397 0.95238095 0.92063492 0.98412698 0.88888889
|
|
0.95238095 0.96825397 0.95238095 0.9047619 ]
|
|
|
|
mean value: 0.9412698412698413
|
|
|
|
key: train_accuracy
|
|
value: [0.97707231 0.97530864 0.97530864 0.97707231 0.97530864 0.98236332
|
|
0.97178131 0.97707231 0.97530864 0.97883598]
|
|
|
|
mean value: 0.9765432098765432
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.96875 0.95238095 0.91803279 0.98412698 0.88888889
|
|
0.95238095 0.96969697 0.95238095 0.90322581]
|
|
|
|
mean value: 0.9412941216269481
|
|
|
|
key: train_fscore
|
|
value: [0.97707231 0.97535211 0.9754386 0.9771529 0.9754386 0.98239437
|
|
0.97183099 0.97707231 0.97535211 0.97894737]
|
|
|
|
mean value: 0.9766051659503838
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.93939394 0.9375 0.93333333 0.96875 0.90322581
|
|
0.96774194 0.94117647 0.96774194 0.93333333]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
0.9374549695244667
|
|
|
|
key: train_precision
|
|
value: [0.97879859 0.97535211 0.97202797 0.9754386 0.97202797 0.97894737
|
|
0.96842105 0.97535211 0.97192982 0.97212544]
|
|
|
|
mean value: 0.9740421033625828
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.96774194 0.90322581 1. 0.875
|
|
0.9375 1. 0.9375 0.875 ]
|
|
|
|
mean value: 0.9463709677419355
|
|
|
|
key: train_recall
|
|
value: [0.97535211 0.97535211 0.97887324 0.97887324 0.97887324 0.98586572
|
|
0.97526502 0.97879859 0.97879859 0.98586572]
|
|
|
|
mean value: 0.9791917583237943
|
|
|
|
key: test_roc_auc
|
|
value: [0.92137097 0.96875 0.95262097 0.9203629 0.984375 0.8891129
|
|
0.95262097 0.96774194 0.95262097 0.90524194]
|
|
|
|
mean value: 0.9414818548387097
|
|
|
|
key: train_roc_auc
|
|
value: [0.97707535 0.97530857 0.97530234 0.97706913 0.97530234 0.98236948
|
|
0.97178744 0.97707535 0.97531479 0.97884836]
|
|
|
|
mean value: 0.9765453142885583
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.93939394 0.90909091 0.84848485 0.96875 0.8
|
|
0.90909091 0.94117647 0.90909091 0.82352941]
|
|
|
|
mean value: 0.8905750254647313
|
|
|
|
key: train_jcc
|
|
value: [0.95517241 0.95189003 0.95205479 0.95532646 0.95205479 0.96539792
|
|
0.94520548 0.95517241 0.95189003 0.95876289]
|
|
|
|
mean value: 0.9542927235762351
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02530265 0.01045108 0.01047325 0.01049042 0.01063466 0.01044035
|
|
0.01038694 0.01152253 0.01164103 0.01163292]
|
|
|
|
mean value: 0.012297582626342774
|
|
|
|
key: score_time
|
|
value: [0.01132822 0.00885463 0.00890088 0.00890636 0.00894165 0.00954676
|
|
0.00943446 0.00970578 0.00968838 0.00976562]
|
|
|
|
mean value: 0.009507274627685547
|
|
|
|
key: test_mcc
|
|
value: [0.64134943 0.72407013 0.60364273 0.75254943 0.88034084 0.71705182
|
|
0.71705182 0.68740835 0.71705182 0.56449867]
|
|
|
|
mean value: 0.7005015034599485
|
|
|
|
key: train_mcc
|
|
value: [0.72022433 0.71543362 0.7251175 0.7178705 0.70361491 0.72038095
|
|
0.71955095 0.71155171 0.72051044 0.73467563]
|
|
|
|
mean value: 0.7188930537628848
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.85714286 0.79365079 0.87301587 0.93650794 0.85714286
|
|
0.85714286 0.84126984 0.85714286 0.77777778]
|
|
|
|
mean value: 0.846031746031746
|
|
|
|
key: train_accuracy
|
|
value: [0.85537919 0.85361552 0.85714286 0.85537919 0.84832451 0.85537919
|
|
0.85537919 0.85361552 0.85714286 0.86243386]
|
|
|
|
mean value: 0.855379188712522
|
|
|
|
key: test_fscore
|
|
value: [0.82857143 0.86567164 0.8115942 0.87878788 0.93939394 0.86567164
|
|
0.86567164 0.85294118 0.86567164 0.8 ]
|
|
|
|
mean value: 0.8573975193286565
|
|
|
|
key: train_fscore
|
|
value: [0.86644951 0.86415712 0.86871961 0.86513158 0.85855263 0.86601307
|
|
0.86557377 0.86097152 0.86567164 0.87254902]
|
|
|
|
mean value: 0.8653789480498528
|
|
|
|
key: test_precision
|
|
value: [0.74358974 0.80555556 0.73684211 0.82857143 0.88571429 0.82857143
|
|
0.82857143 0.80555556 0.82857143 0.73684211]
|
|
|
|
mean value: 0.802838506522717
|
|
|
|
key: train_precision
|
|
value: [0.80606061 0.80733945 0.8048048 0.8117284 0.80555556 0.80547112
|
|
0.80733945 0.81847134 0.815625 0.81155015]
|
|
|
|
mean value: 0.8093945874740627
|
|
|
|
key: test_recall
|
|
value: [0.93548387 0.93548387 0.90322581 0.93548387 1. 0.90625
|
|
0.90625 0.90625 0.90625 0.875 ]
|
|
|
|
mean value: 0.9209677419354838
|
|
|
|
key: train_recall
|
|
value: [0.93661972 0.92957746 0.94366197 0.92605634 0.91901408 0.93639576
|
|
0.93286219 0.90812721 0.92226148 0.9434629 ]
|
|
|
|
mean value: 0.9298039118100832
|
|
|
|
key: test_roc_auc
|
|
value: [0.81149194 0.85836694 0.7953629 0.87399194 0.9375 0.85635081
|
|
0.85635081 0.84022177 0.85635081 0.77620968]
|
|
|
|
mean value: 0.8462197580645161
|
|
|
|
key: train_roc_auc
|
|
value: [0.85523565 0.85348131 0.85699 0.85525432 0.84819962 0.85552182
|
|
0.8555156 0.85371149 0.8572575 0.86257652]
|
|
|
|
mean value: 0.8553743841138705
|
|
|
|
key: test_jcc
|
|
value: [0.70731707 0.76315789 0.68292683 0.78378378 0.88571429 0.76315789
|
|
0.76315789 0.74358974 0.76315789 0.66666667]
|
|
|
|
mean value: 0.7522629961140873
|
|
|
|
key: train_jcc
|
|
value: [0.76436782 0.76080692 0.76790831 0.76231884 0.75216138 0.76368876
|
|
0.76300578 0.75588235 0.76315789 0.77391304]
|
|
|
|
mean value: 0.7627211098149084
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.13090587 0.10350132 0.10109782 0.10042334 0.10605383 0.10390592
|
|
0.10422969 0.10304856 0.10623789 0.10313129]
|
|
|
|
mean value: 0.10625355243682862
|
|
|
|
key: score_time
|
|
value: [0.01223612 0.01163149 0.01133299 0.01166677 0.01103902 0.01146054
|
|
0.01168728 0.01112723 0.01143074 0.01105571]
|
|
|
|
mean value: 0.011466789245605468
|
|
|
|
key: test_mcc
|
|
value: [0.81130213 0.96875 0.93832585 0.87298387 0.96875 0.81130213
|
|
0.90514678 0.93832585 0.96871896 0.87487431]
|
|
|
|
mean value: 0.9058479882892593
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.98412698 0.96825397 0.93650794 0.98412698 0.9047619
|
|
0.95238095 0.96825397 0.98412698 0.93650794]
|
|
|
|
mean value: 0.9523809523809523
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90625 0.98412698 0.96666667 0.93548387 0.98412698 0.90322581
|
|
0.95384615 0.96969697 0.98461538 0.93548387]
|
|
|
|
mean value: 0.952352269146624
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.87878788 0.96875 1. 0.93548387 0.96875 0.93333333
|
|
0.93939394 0.94117647 0.96969697 0.96666667]
|
|
|
|
mean value: 0.9502039129434765
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.93548387 1. 0.93548387 0.93548387 1. 0.875
|
|
0.96875 1. 1. 0.90625 ]
|
|
|
|
mean value: 0.9556451612903225
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90524194 0.984375 0.96774194 0.93649194 0.984375 0.90524194
|
|
0.95211694 0.96774194 0.98387097 0.93699597]
|
|
|
|
mean value: 0.9524193548387097
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.82857143 0.96875 0.93548387 0.87878788 0.96875 0.82352941
|
|
0.91176471 0.94117647 0.96969697 0.87878788]
|
|
|
|
mean value: 0.9105298615047192
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.39
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04917097 0.06506038 0.09447956 0.07266498 0.11047935 0.05619383
|
|
0.04372478 0.08268929 0.05707788 0.0449338 ]
|
|
|
|
mean value: 0.06764748096466064
|
|
|
|
key: score_time
|
|
value: [0.01903558 0.01216626 0.01233268 0.01224375 0.01900625 0.01239228
|
|
0.01228929 0.0193572 0.01213884 0.01548886]
|
|
|
|
mean value: 0.014645099639892578
|
|
|
|
key: test_mcc
|
|
value: [0.74634526 0.88034084 0.81644514 0.82507166 0.90900317 0.68352185
|
|
0.87988269 0.85168687 0.78719616 0.70447456]
|
|
|
|
mean value: 0.8083968208368107
|
|
|
|
key: train_mcc
|
|
value: [0.93764641 0.92093247 0.9272178 0.92051926 0.92051926 0.93800882
|
|
0.93765708 0.93800882 0.92723116 0.93466907]
|
|
|
|
mean value: 0.9302410165823197
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.93650794 0.9047619 0.9047619 0.95238095 0.84126984
|
|
0.93650794 0.92063492 0.88888889 0.84126984]
|
|
|
|
mean value: 0.8984126984126984
|
|
|
|
key: train_accuracy
|
|
value: [0.96825397 0.95943563 0.96296296 0.95943563 0.95943563 0.96825397
|
|
0.96825397 0.96825397 0.96296296 0.9664903 ]
|
|
|
|
mean value: 0.964373897707231
|
|
|
|
key: test_fscore
|
|
value: [0.87323944 0.93939394 0.90909091 0.91176471 0.95384615 0.84848485
|
|
0.94117647 0.92753623 0.89855072 0.86111111]
|
|
|
|
mean value: 0.9064194531539008
|
|
|
|
key: train_fscore
|
|
value: [0.96907216 0.96081772 0.96397942 0.96068376 0.96068376 0.96907216
|
|
0.96896552 0.96907216 0.96385542 0.96740995]
|
|
|
|
mean value: 0.9653612037698771
|
|
|
|
key: test_precision
|
|
value: [0.775 0.88571429 0.85714286 0.83783784 0.91176471 0.82352941
|
|
0.88888889 0.86486486 0.83783784 0.775 ]
|
|
|
|
mean value: 0.8457580689933631
|
|
|
|
key: train_precision
|
|
value: [0.94630872 0.93069307 0.93979933 0.93355482 0.93355482 0.94314381
|
|
0.94612795 0.94314381 0.93959732 0.94 ]
|
|
|
|
mean value: 0.9395923646776567
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96774194 1. 1. 0.875
|
|
1. 1. 0.96875 0.96875 ]
|
|
|
|
mean value: 0.9780241935483871
|
|
|
|
key: train_recall
|
|
value: [0.99295775 0.99295775 0.98943662 0.98943662 0.98943662 0.99646643
|
|
0.99293286 0.99646643 0.98939929 0.99646643]
|
|
|
|
mean value: 0.9925956800875927
|
|
|
|
key: test_roc_auc
|
|
value: [0.859375 0.9375 0.90574597 0.90625 0.953125 0.84072581
|
|
0.93548387 0.91935484 0.88760081 0.83921371]
|
|
|
|
mean value: 0.8984375
|
|
|
|
key: train_roc_auc
|
|
value: [0.96821032 0.9593764 0.96291619 0.95938262 0.95938262 0.96830364
|
|
0.96829742 0.96830364 0.96300951 0.96654307]
|
|
|
|
mean value: 0.964372542676554
|
|
|
|
key: test_jcc
|
|
value: [0.775 0.88571429 0.83333333 0.83783784 0.91176471 0.73684211
|
|
0.88888889 0.86486486 0.81578947 0.75609756]
|
|
|
|
mean value: 0.8306133056444541
|
|
|
|
key: train_jcc
|
|
value: [0.94 0.92459016 0.93046358 0.92434211 0.92434211 0.94
|
|
0.93979933 0.94 0.93023256 0.93687708]
|
|
|
|
mean value: 0.9330646916274856
|
|
|
|
MCC on Blind test: 0.22
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01433301 0.0104239 0.00994563 0.00998878 0.01010275 0.00995636
|
|
0.01008821 0.0101459 0.01015592 0.01132917]
|
|
|
|
mean value: 0.010646963119506836
|
|
|
|
key: score_time
|
|
value: [0.00958967 0.00911641 0.00868368 0.00877547 0.00914621 0.00870824
|
|
0.00867462 0.00939846 0.00946808 0.00950575]
|
|
|
|
mean value: 0.009106659889221191
|
|
|
|
key: test_mcc
|
|
value: [0.76058095 0.85238636 0.62861856 0.72098341 0.82507166 0.68740835
|
|
0.73163649 0.77042092 0.58371723 0.50663549]
|
|
|
|
mean value: 0.7067459406681817
|
|
|
|
key: train_mcc
|
|
value: [0.72224441 0.72310788 0.74524776 0.73632323 0.73073344 0.74380641
|
|
0.72369241 0.72178687 0.73950079 0.67831566]
|
|
|
|
mean value: 0.726475885490787
|
|
|
|
key: test_accuracy
|
|
value: [0.87301587 0.92063492 0.79365079 0.84126984 0.9047619 0.84126984
|
|
0.85714286 0.87301587 0.77777778 0.74603175]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_accuracy
|
|
value: [0.85185185 0.85008818 0.86243386 0.85714286 0.85537919 0.86067019
|
|
0.85185185 0.84832451 0.85890653 0.83421517]
|
|
|
|
mean value: 0.8530864197530864
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.92537313 0.82191781 0.86111111 0.91176471 0.85294118
|
|
0.87323944 0.88888889 0.81081081 0.77777778]
|
|
|
|
mean value: 0.8606177791285254
|
|
|
|
key: train_fscore
|
|
value: [0.86708861 0.86697966 0.87697161 0.87284144 0.87066246 0.87559055
|
|
0.86708861 0.865625 0.87381703 0.84690554]
|
|
|
|
mean value: 0.8683570507913272
|
|
|
|
key: test_precision
|
|
value: [0.81081081 0.86111111 0.71428571 0.75609756 0.83783784 0.80555556
|
|
0.79487179 0.8 0.71428571 0.7 ]
|
|
|
|
mean value: 0.7794856099734149
|
|
|
|
key: train_precision
|
|
value: [0.78735632 0.78028169 0.79428571 0.78753541 0.78857143 0.78977273
|
|
0.78510029 0.77591036 0.78917379 0.78549849]
|
|
|
|
mean value: 0.7863486222153049
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.96774194 1. 1. 0.90625
|
|
0.96875 1. 0.9375 0.875 ]
|
|
|
|
mean value: 0.9622983870967742
|
|
|
|
key: train_recall
|
|
value: [0.96478873 0.97535211 0.97887324 0.97887324 0.97183099 0.98233216
|
|
0.96819788 0.97879859 0.97879859 0.91872792]
|
|
|
|
mean value: 0.9696573433534067
|
|
|
|
key: test_roc_auc
|
|
value: [0.87449597 0.921875 0.79637097 0.84375 0.90625 0.84022177
|
|
0.85534274 0.87096774 0.77520161 0.74395161]
|
|
|
|
mean value: 0.8428427419354838
|
|
|
|
key: train_roc_auc
|
|
value: [0.85165232 0.84986687 0.86222814 0.85692779 0.85517344 0.86088439
|
|
0.85205669 0.84855422 0.8591176 0.83436396]
|
|
|
|
mean value: 0.8530825411834968
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.86111111 0.69767442 0.75609756 0.83783784 0.74358974
|
|
0.775 0.8 0.68181818 0.63636364]
|
|
|
|
mean value: 0.7578966174511298
|
|
|
|
key: train_jcc
|
|
value: [0.76536313 0.76519337 0.78089888 0.77437326 0.77094972 0.77871148
|
|
0.76536313 0.7630854 0.77591036 0.73446328]
|
|
|
|
mean value: 0.7674312008301486
|
|
|
|
MCC on Blind test: 0.33
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01895046 0.01879478 0.01897502 0.02728033 0.02328038 0.0214169
|
|
0.02027035 0.02063298 0.01998615 0.02093506]
|
|
|
|
mean value: 0.021052241325378418
|
|
|
|
key: score_time
|
|
value: [0.00987387 0.01109147 0.01173568 0.01172709 0.01172948 0.01174998
|
|
0.01168871 0.01170945 0.01169086 0.01167035]
|
|
|
|
mean value: 0.011466693878173829
|
|
|
|
key: test_mcc
|
|
value: [0.65821474 1. 0.84530217 0.85238636 0.96875 0.71705182
|
|
0.90873893 0.87988269 0.78094752 0.82408564]
|
|
|
|
mean value: 0.8435359868061252
|
|
|
|
key: train_mcc
|
|
value: [0.86766083 0.87303878 0.85730272 0.88644555 0.91276949 0.93133966
|
|
0.88575247 0.91901646 0.90974781 0.92323145]
|
|
|
|
mean value: 0.896630522099505
|
|
|
|
key: test_accuracy
|
|
value: [0.82539683 1. 0.92063492 0.92063492 0.98412698 0.85714286
|
|
0.95238095 0.93650794 0.88888889 0.9047619 ]
|
|
|
|
mean value: 0.919047619047619
|
|
|
|
key: train_accuracy
|
|
value: [0.93121693 0.93650794 0.92416226 0.94003527 0.95590829 0.96472663
|
|
0.94003527 0.95943563 0.95414462 0.96119929]
|
|
|
|
mean value: 0.9467372134038801
|
|
|
|
key: test_fscore
|
|
value: [0.80701754 1. 0.92307692 0.92537313 0.98412698 0.86567164
|
|
0.95522388 0.94117647 0.89552239 0.91428571]
|
|
|
|
mean value: 0.9211474680713625
|
|
|
|
key: train_fscore
|
|
value: [0.9273743 0.93639576 0.92939245 0.94352159 0.95697074 0.96575342
|
|
0.94314381 0.95971979 0.95532646 0.96193772]
|
|
|
|
mean value: 0.9479536046767796
|
|
|
|
key: test_precision
|
|
value: [0.88461538 1. 0.88235294 0.86111111 0.96875 0.82857143
|
|
0.91428571 0.88888889 0.85714286 0.84210526]
|
|
|
|
mean value: 0.892782358894975
|
|
|
|
key: train_precision
|
|
value: [0.98418972 0.93971631 0.87076923 0.89308176 0.93602694 0.93687708
|
|
0.8952381 0.95138889 0.92976589 0.94237288]
|
|
|
|
mean value: 0.9279426791361853
|
|
|
|
key: test_recall
|
|
value: [0.74193548 1. 0.96774194 1. 1. 0.90625
|
|
1. 1. 0.9375 1. ]
|
|
|
|
mean value: 0.9553427419354839
|
|
|
|
key: train_recall
|
|
value: [0.87676056 0.93309859 0.99647887 1. 0.97887324 0.99646643
|
|
0.99646643 0.96819788 0.98233216 0.98233216]
|
|
|
|
mean value: 0.9711006320609168
|
|
|
|
key: test_roc_auc
|
|
value: [0.82409274 1. 0.92137097 0.921875 0.984375 0.85635081
|
|
0.9516129 0.93548387 0.88810484 0.90322581]
|
|
|
|
mean value: 0.918649193548387
|
|
|
|
key: train_roc_auc
|
|
value: [0.93131314 0.93651396 0.92403449 0.93992933 0.95586772 0.96478251
|
|
0.94013462 0.95945105 0.95419425 0.9612365 ]
|
|
|
|
mean value: 0.9467457572288857
|
|
|
|
key: test_jcc
|
|
value: [0.67647059 1. 0.85714286 0.86111111 0.96875 0.76315789
|
|
0.91428571 0.88888889 0.81081081 0.84210526]
|
|
|
|
mean value: 0.8582723128369413
|
|
|
|
key: train_jcc
|
|
value: [0.86458333 0.88039867 0.86809816 0.89308176 0.91749175 0.93377483
|
|
0.89240506 0.92255892 0.91447368 0.92666667]
|
|
|
|
mean value: 0.9013532845284429
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02053857 0.02511358 0.02637434 0.02378893 0.01977444 0.01945734
|
|
0.0223918 0.02318025 0.02271199 0.01939034]
|
|
|
|
mean value: 0.022272157669067382
|
|
|
|
key: score_time
|
|
value: [0.02004814 0.01181602 0.01178932 0.011796 0.01173639 0.01174283
|
|
0.01172662 0.01842642 0.01182628 0.01177096]
|
|
|
|
mean value: 0.013267898559570312
|
|
|
|
key: test_mcc
|
|
value: [0.74722285 0.74424094 0.78719616 0.90900317 0.96875 0.68865372
|
|
0.90873893 0.87988269 0.78719616 0.74596774]
|
|
|
|
mean value: 0.8166852372205127
|
|
|
|
key: train_mcc
|
|
value: [0.91933887 0.53873561 0.8860463 0.94393557 0.90521259 0.89189154
|
|
0.90123272 0.92639673 0.89033689 0.85870259]
|
|
|
|
mean value: 0.8661829411757017
|
|
|
|
key: test_accuracy
|
|
value: [0.87301587 0.85714286 0.88888889 0.95238095 0.98412698 0.84126984
|
|
0.95238095 0.93650794 0.88888889 0.87301587]
|
|
|
|
mean value: 0.9047619047619048
|
|
|
|
key: train_accuracy
|
|
value: [0.95943563 0.73015873 0.94179894 0.97178131 0.95238095 0.94532628
|
|
0.94885362 0.96296296 0.94356261 0.92768959]
|
|
|
|
mean value: 0.928395061728395
|
|
|
|
key: test_fscore
|
|
value: [0.86666667 0.83018868 0.87719298 0.95384615 0.98412698 0.83333333
|
|
0.95522388 0.94117647 0.89855072 0.875 ]
|
|
|
|
mean value: 0.9015305875497492
|
|
|
|
key: train_fscore
|
|
value: [0.9588551 0.63657957 0.93967093 0.97222222 0.95320624 0.94373866
|
|
0.95093063 0.96347826 0.94576271 0.92421442]
|
|
|
|
mean value: 0.9188658738108904
|
|
|
|
key: test_precision
|
|
value: [0.89655172 1. 0.96153846 0.91176471 0.96875 0.89285714
|
|
0.91428571 0.88888889 0.83783784 0.875 ]
|
|
|
|
mean value: 0.914747447542833
|
|
|
|
key: train_precision
|
|
value: [0.97454545 0.97810219 0.97718631 0.95890411 0.93856655 0.97014925
|
|
0.91233766 0.94863014 0.90879479 0.96899225]
|
|
|
|
mean value: 0.9536208707994551
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.70967742 0.80645161 1. 1. 0.78125
|
|
1. 1. 0.96875 0.875 ]
|
|
|
|
mean value: 0.8979838709677419
|
|
|
|
key: train_recall
|
|
value: [0.94366197 0.47183099 0.90492958 0.98591549 0.96830986 0.91872792
|
|
0.99293286 0.97879859 0.98586572 0.88339223]
|
|
|
|
mean value: 0.9034365201811576
|
|
|
|
key: test_roc_auc
|
|
value: [0.87247984 0.85483871 0.88760081 0.953125 0.984375 0.8422379
|
|
0.9516129 0.93548387 0.88760081 0.87298387]
|
|
|
|
mean value: 0.904233870967742
|
|
|
|
key: train_roc_auc
|
|
value: [0.95946349 0.73061514 0.94186408 0.97175633 0.95235281 0.94527945
|
|
0.94893122 0.96299084 0.94363709 0.92761161]
|
|
|
|
mean value: 0.9284502065395909
|
|
|
|
key: test_jcc
|
|
value: [0.76470588 0.70967742 0.78125 0.91176471 0.96875 0.71428571
|
|
0.91428571 0.88888889 0.81578947 0.77777778]
|
|
|
|
mean value: 0.8247175576512439
|
|
|
|
key: train_jcc
|
|
value: [0.9209622 0.46689895 0.8862069 0.94594595 0.91059603 0.89347079
|
|
0.90645161 0.9295302 0.89710611 0.85910653]
|
|
|
|
mean value: 0.8616275266162179
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.28951597 0.27887893 0.27886248 0.27727175 0.27663636 0.27909327
|
|
0.2795558 0.27781129 0.27442789 0.270612 ]
|
|
|
|
mean value: 0.27826657295227053
|
|
|
|
key: score_time
|
|
value: [0.01680303 0.01672268 0.01689577 0.01682281 0.01647258 0.01687884
|
|
0.01667738 0.01691699 0.01552916 0.01534081]
|
|
|
|
mean value: 0.016506004333496093
|
|
|
|
key: test_mcc
|
|
value: [0.75254943 0.93844649 0.90524194 0.87487431 0.96875 0.81644514
|
|
0.90873893 0.90514678 0.87462485 0.77800241]
|
|
|
|
mean value: 0.8722820284424901
|
|
|
|
key: train_mcc
|
|
value: [0.9929453 0.99296993 0.99647883 1. 0.99647887 1.
|
|
0.98942418 0.98942418 1. 1. ]
|
|
|
|
mean value: 0.9957721289135614
|
|
|
|
key: test_accuracy
|
|
value: [0.87301587 0.96825397 0.95238095 0.93650794 0.98412698 0.9047619
|
|
0.95238095 0.95238095 0.93650794 0.88888889]
|
|
|
|
mean value: 0.9349206349206349
|
|
|
|
key: train_accuracy
|
|
value: [0.99647266 0.99647266 0.99823633 1. 0.99823633 1.
|
|
0.99470899 0.99470899 1. 1. ]
|
|
|
|
mean value: 0.9978835978835978
|
|
|
|
key: test_fscore
|
|
value: [0.87878788 0.96875 0.95238095 0.9375 0.98412698 0.9
|
|
0.95522388 0.95384615 0.93939394 0.89230769]
|
|
|
|
mean value: 0.9362317481440615
|
|
|
|
key: train_fscore
|
|
value: [0.99647887 0.99649123 0.99824253 1. 0.99823633 1.
|
|
0.99470899 0.99470899 1. 1. ]
|
|
|
|
mean value: 0.9978866953052978
|
|
|
|
key: test_precision
|
|
value: [0.82857143 0.93939394 0.9375 0.90909091 0.96875 0.96428571
|
|
0.91428571 0.93939394 0.91176471 0.87878788]
|
|
|
|
mean value: 0.9191824229691877
|
|
|
|
key: train_precision
|
|
value: [0.99647887 0.99300699 0.99649123 1. 1. 1.
|
|
0.99295775 0.99295775 1. 1. ]
|
|
|
|
mean value: 0.9971892587274351
|
|
|
|
key: test_recall
|
|
value: [0.93548387 1. 0.96774194 0.96774194 1. 0.84375
|
|
1. 0.96875 0.96875 0.90625 ]
|
|
|
|
mean value: 0.9558467741935484
|
|
|
|
key: train_recall
|
|
value: [0.99647887 1. 1. 1. 0.99647887 1.
|
|
0.99646643 0.99646643 1. 1. ]
|
|
|
|
mean value: 0.9985890608669686
|
|
|
|
key: test_roc_auc
|
|
value: [0.87399194 0.96875 0.95262097 0.93699597 0.984375 0.90574597
|
|
0.9516129 0.95211694 0.9359879 0.88860887]
|
|
|
|
mean value: 0.9350806451612903
|
|
|
|
key: train_roc_auc
|
|
value: [0.99647265 0.99646643 0.99823322 1. 0.99823944 1.
|
|
0.99471209 0.99471209 1. 1. ]
|
|
|
|
mean value: 0.9978835913004529
|
|
|
|
key: test_jcc
|
|
value: [0.78378378 0.93939394 0.90909091 0.88235294 0.96875 0.81818182
|
|
0.91428571 0.91176471 0.88571429 0.80555556]
|
|
|
|
mean value: 0.8818873653064829
|
|
|
|
key: train_jcc
|
|
value: [0.99298246 0.99300699 0.99649123 1. 0.99647887 1.
|
|
0.98947368 0.98947368 1. 1. ]
|
|
|
|
mean value: 0.9957906918878009
|
|
|
|
MCC on Blind test: 0.23
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11279559 0.14226246 0.14191484 0.13739991 0.11459565 0.13350439
|
|
0.12016177 0.1054635 0.12414002 0.1232295 ]
|
|
|
|
mean value: 0.12554676532745362
|
|
|
|
key: score_time
|
|
value: [0.04038763 0.03679395 0.03112054 0.02297783 0.03874803 0.03140926
|
|
0.02326155 0.03403425 0.0376792 0.03341651]
|
|
|
|
mean value: 0.03298287391662598
|
|
|
|
key: test_mcc
|
|
value: [0.78719616 0.87298387 0.87298387 0.77822581 0.96871896 0.72407013
|
|
0.87462485 1. 0.87298387 0.87298387]
|
|
|
|
mean value: 0.8624771394557307
|
|
|
|
key: train_mcc
|
|
value: [0.99647887 0.98942418 0.98947316 0.99647887 1. 0.99296993
|
|
0.9929453 0.99647887 0.99647883 0.98942418]
|
|
|
|
mean value: 0.9940152198663756
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.93650794 0.93650794 0.88888889 0.98412698 0.85714286
|
|
0.93650794 1. 0.93650794 0.93650794]
|
|
|
|
mean value: 0.9301587301587302
|
|
|
|
key: train_accuracy
|
|
value: [0.99823633 0.99470899 0.99470899 0.99823633 1. 0.99647266
|
|
0.99647266 0.99823633 0.99823633 0.99470899]
|
|
|
|
mean value: 0.9970017636684303
|
|
|
|
key: test_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.87719298 0.93548387 0.93548387 0.88888889 0.98360656 0.84745763
|
|
0.93939394 1. 0.9375 0.9375 ]
|
|
|
|
mean value: 0.9282507737170146
|
|
|
|
key: train_fscore
|
|
value: [0.99823633 0.99470899 0.99474606 0.99823633 1. 0.9964539
|
|
0.99646643 0.99823633 0.99823009 0.99470899]
|
|
|
|
mean value: 0.9970023463971844
|
|
|
|
key: test_precision
|
|
value: [0.96153846 0.93548387 0.93548387 0.875 1. 0.92592593
|
|
0.91176471 1. 0.9375 0.9375 ]
|
|
|
|
mean value: 0.9420196835282224
|
|
|
|
key: train_precision
|
|
value: [1. 0.99646643 0.98954704 1. 1. 1.
|
|
0.99646643 0.99647887 1. 0.99295775]
|
|
|
|
mean value: 0.9971916520236649
|
|
|
|
key: test_recall
|
|
value: [0.80645161 0.93548387 0.93548387 0.90322581 0.96774194 0.78125
|
|
0.96875 1. 0.9375 0.9375 ]
|
|
|
|
mean value: 0.9173387096774194
|
|
|
|
key: train_recall
|
|
value: [0.99647887 0.99295775 1. 0.99647887 1. 0.99293286
|
|
0.99646643 1. 0.99646643 0.99646643]
|
|
|
|
mean value: 0.9968247648434778
|
|
|
|
key: test_roc_auc
|
|
value: [0.88760081 0.93649194 0.93649194 0.8891129 0.98387097 0.85836694
|
|
0.9359879 1. 0.93649194 0.93649194]
|
|
|
|
mean value: 0.9300907258064516
|
|
|
|
key: train_roc_auc
|
|
value: [0.99823944 0.99471209 0.99469965 0.99823944 1. 0.99646643
|
|
0.99647265 0.99823944 0.99823322 0.99471209]
|
|
|
|
mean value: 0.9970014432887075
|
|
|
|
key: test_jcc
|
|
value: [0.78125 0.87878788 0.87878788 0.8 0.96774194 0.73529412
|
|
0.88571429 1. 0.88235294 0.88235294]
|
|
|
|
mean value: 0.8692281978773915
|
|
|
|
key: train_jcc
|
|
value: [0.99647887 0.98947368 0.98954704 0.99647887 1. 0.99293286
|
|
0.99295775 0.99647887 0.99646643 0.98947368]
|
|
|
|
mean value: 0.994028806623198
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.4
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.24788141 0.28544402 0.33319592 0.18345165 0.17821884 0.17617846
|
|
0.21043038 0.26548791 0.20081067 0.25987506]
|
|
|
|
mean value: 0.23409743309020997
|
|
|
|
key: score_time
|
|
value: [0.02718711 0.02728367 0.04041862 0.01620889 0.01610303 0.02732253
|
|
0.01601171 0.02752137 0.01609707 0.0161376 ]
|
|
|
|
mean value: 0.023029160499572755
|
|
|
|
key: test_mcc
|
|
value: [0.82507166 1. 0.78822824 0.84530217 0.93844649 0.81092385
|
|
0.93832585 0.82408564 0.87462485 0.71705182]
|
|
|
|
mean value: 0.8562060582763098
|
|
|
|
key: train_mcc
|
|
value: [0.96532513 0.96532513 0.96532513 0.96874043 0.96192098 0.96874387
|
|
0.96192609 0.96532937 0.96192609 0.96532937]
|
|
|
|
mean value: 0.9649891582347708
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 1. 0.88888889 0.92063492 0.96825397 0.9047619
|
|
0.96825397 0.9047619 0.93650794 0.85714286]
|
|
|
|
mean value: 0.9253968253968254
|
|
|
|
key: train_accuracy
|
|
value: [0.98236332 0.98236332 0.98236332 0.98412698 0.98059965 0.98412698
|
|
0.98059965 0.98236332 0.98059965 0.98236332]
|
|
|
|
mean value: 0.9821869488536155
|
|
|
|
key: test_fscore
|
|
value: [0.91176471 1. 0.89552239 0.92307692 0.96875 0.90909091
|
|
0.96969697 0.91428571 0.93939394 0.86567164]
|
|
|
|
mean value: 0.9297253191277555
|
|
|
|
key: train_fscore
|
|
value: [0.98269896 0.98269896 0.98269896 0.98440208 0.98100173 0.98434783
|
|
0.98093588 0.98263889 0.98093588 0.98263889]
|
|
|
|
mean value: 0.9824998046949579
|
|
|
|
key: test_precision
|
|
value: [0.83783784 1. 0.83333333 0.88235294 0.93939394 0.88235294
|
|
0.94117647 0.84210526 0.91176471 0.82857143]
|
|
|
|
mean value: 0.8898888861117963
|
|
|
|
key: train_precision
|
|
value: [0.96598639 0.96598639 0.96598639 0.96928328 0.96271186 0.96917808
|
|
0.96258503 0.96587031 0.96258503 0.96587031]
|
|
|
|
mean value: 0.9656043089084223
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96774194 0.96774194 1. 0.9375
|
|
1. 1. 0.96875 0.90625 ]
|
|
|
|
mean value: 0.9747983870967742
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 1. 0.89012097 0.92137097 0.96875 0.90423387
|
|
0.96774194 0.90322581 0.9359879 0.85635081]
|
|
|
|
mean value: 0.9254032258064516
|
|
|
|
key: train_roc_auc
|
|
value: [0.98233216 0.98233216 0.98233216 0.98409894 0.98056537 0.98415493
|
|
0.9806338 0.98239437 0.9806338 0.98239437]
|
|
|
|
mean value: 0.9821872044990793
|
|
|
|
key: test_jcc
|
|
value: [0.83783784 1. 0.81081081 0.85714286 0.93939394 0.83333333
|
|
0.94117647 0.84210526 0.88571429 0.76315789]
|
|
|
|
mean value: 0.8710672692716036
|
|
|
|
key: train_jcc
|
|
value: [0.96598639 0.96598639 0.96598639 0.96928328 0.96271186 0.96917808
|
|
0.96258503 0.96587031 0.96258503 0.96587031]
|
|
|
|
mean value: 0.9656043089084223
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.12211394 1.12155557 1.13342404 1.13354874 1.12435651 1.12502313
|
|
1.12549019 1.14494801 1.12875891 1.13248897]
|
|
|
|
mean value: 1.129170799255371
|
|
|
|
key: score_time
|
|
value: [0.009444 0.0094707 0.00939322 0.00949883 0.00976253 0.00935173
|
|
0.01014686 0.00918889 0.01006961 0.00922108]
|
|
|
|
mean value: 0.009554743766784668
|
|
|
|
key: test_mcc
|
|
value: [0.81644514 0.96875 0.93649194 0.84173387 0.96875 0.78160117
|
|
0.87462485 0.93832585 0.96871896 0.84173387]
|
|
|
|
mean value: 0.8937175649547898
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.98412698 0.96825397 0.92063492 0.98412698 0.88888889
|
|
0.93650794 0.96825397 0.98412698 0.92063492]
|
|
|
|
mean value: 0.946031746031746
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.90909091 0.98412698 0.96774194 0.92063492 0.98412698 0.8852459
|
|
0.93939394 0.96969697 0.98461538 0.92063492]
|
|
|
|
mean value: 0.9465308849444227
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.96875 0.96774194 0.90625 0.96875 0.93103448
|
|
0.91176471 0.94117647 0.96969697 0.93548387]
|
|
|
|
mean value: 0.9357791292520649
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.96774194 0.93548387 1. 0.84375
|
|
0.96875 1. 1. 0.90625 ]
|
|
|
|
mean value: 0.9589717741935484
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90574597 0.984375 0.96824597 0.92086694 0.984375 0.88961694
|
|
0.9359879 0.96774194 0.98387097 0.92086694]
|
|
|
|
mean value: 0.9461693548387097
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83333333 0.96875 0.9375 0.85294118 0.96875 0.79411765
|
|
0.88571429 0.94117647 0.96969697 0.85294118]
|
|
|
|
mean value: 0.9004921059332824
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03364587 0.03206563 0.03213382 0.03187656 0.03195691 0.03247809
|
|
0.03221917 0.03244352 0.03221703 0.04619622]
|
|
|
|
mean value: 0.03372328281402588
|
|
|
|
key: score_time
|
|
value: [0.0128541 0.01426649 0.01441693 0.01492095 0.01492882 0.03190422
|
|
0.02246666 0.015064 0.01553226 0.0249784 ]
|
|
|
|
mean value: 0.01813328266143799
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 1. 1. 0.96875 1. 1. 0.96875
|
|
1. ]
|
|
|
|
mean value: 0.99375
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 1. 1. 0.98412698
|
|
1. 1. 0.98412698 1. ]
|
|
|
|
mean value: 0.9968253968253968
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 1. 1. 0.98412698
|
|
1. 1. 0.98412698 1. ]
|
|
|
|
mean value: 0.9968253968253968
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.96875 1. 1. 0.96875
|
|
1. ]
|
|
|
|
mean value: 0.99375
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 1. 1. 0.984375 1. 1.
|
|
0.984375 1. ]
|
|
|
|
mean value: 0.996875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 1. 1. 0.96875 1. 1. 0.96875
|
|
1. ]
|
|
|
|
mean value: 0.99375
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01636696 0.01627898 0.0393877 0.03948379 0.0388546 0.05069041
|
|
0.03970265 0.04008818 0.04002047 0.01647902]
|
|
|
|
mean value: 0.03373527526855469
|
|
|
|
key: score_time
|
|
value: [0.04082417 0.01194382 0.01857257 0.01204538 0.01849174 0.01922917
|
|
0.01908541 0.0209341 0.01910353 0.01232028]
|
|
|
|
mean value: 0.01925501823425293
|
|
|
|
key: test_mcc
|
|
value: [0.73343622 0.96875 0.87298387 0.85238636 0.96875 0.74722285
|
|
0.96871896 0.87988269 0.81572458 0.85168687]
|
|
|
|
mean value: 0.8659542402064062
|
|
|
|
key: train_mcc
|
|
value: [0.92051926 0.89358301 0.90678961 0.90724226 0.88650396 0.92471138
|
|
0.88982289 0.90397301 0.90640242 0.9131033 ]
|
|
|
|
mean value: 0.9052651104031916
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.98412698 0.93650794 0.92063492 0.98412698 0.87301587
|
|
0.98412698 0.93650794 0.9047619 0.92063492]
|
|
|
|
mean value: 0.9301587301587302
|
|
|
|
key: train_accuracy
|
|
value: [0.95943563 0.94532628 0.95238095 0.95238095 0.94179894 0.96119929
|
|
0.94356261 0.95061728 0.95238095 0.95590829]
|
|
|
|
mean value: 0.9514991181657848
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.98412698 0.93548387 0.92537313 0.98412698 0.87878788
|
|
0.98461538 0.94117647 0.91176471 0.92753623]
|
|
|
|
mean value: 0.9342556862699283
|
|
|
|
key: train_fscore
|
|
value: [0.96068376 0.94754653 0.95400341 0.95415959 0.94416244 0.96245734
|
|
0.94557823 0.95238095 0.95368782 0.95697074]
|
|
|
|
mean value: 0.9531630811492646
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.96875 0.93548387 0.86111111 0.96875 0.85294118
|
|
0.96969697 0.88888889 0.86111111 0.86486486]
|
|
|
|
mean value: 0.8961071677321802
|
|
|
|
key: train_precision
|
|
value: [0.93355482 0.91205212 0.92409241 0.92131148 0.90879479 0.93069307
|
|
0.91147541 0.91803279 0.92666667 0.93288591]
|
|
|
|
mean value: 0.9219559446199144
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.93548387 1. 1. 0.90625
|
|
1. 1. 0.96875 1. ]
|
|
|
|
mean value: 0.9778225806451613
|
|
|
|
key: train_recall
|
|
value: [0.98943662 0.98591549 0.98591549 0.98943662 0.98239437 0.99646643
|
|
0.98233216 0.98939929 0.98233216 0.98233216]
|
|
|
|
mean value: 0.9865960782362017
|
|
|
|
key: test_roc_auc
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:135: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:138: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.85887097 0.984375 0.93649194 0.921875 0.984375 0.87247984
|
|
0.98387097 0.93548387 0.90372984 0.91935484]
|
|
|
|
mean value: 0.9300907258064517
|
|
|
|
key: train_roc_auc
|
|
value: [0.95938262 0.94525457 0.9523217 0.95231548 0.94172722 0.96126138
|
|
0.94363087 0.95068556 0.95243368 0.95595481]
|
|
|
|
mean value: 0.9514967899268402
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.96875 0.87878788 0.86111111 0.96875 0.78378378
|
|
0.96969697 0.88888889 0.83783784 0.86486486]
|
|
|
|
mean value: 0.8791702104202104
|
|
|
|
key: train_jcc
|
|
value: [0.92434211 0.90032154 0.91205212 0.91233766 0.89423077 0.92763158
|
|
0.89677419 0.90909091 0.91147541 0.91749175]
|
|
|
|
mean value: 0.9105748038101441
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.20769906 0.2146132 0.32142591 0.24213576 0.28507137 0.2914257
|
|
0.34106541 0.30956841 0.28638172 0.31412673]
|
|
|
|
mean value: 0.28135132789611816
|
|
|
|
key: score_time
|
|
value: [0.02348042 0.01734209 0.01882052 0.01215291 0.01882744 0.01870108
|
|
0.01888084 0.01868892 0.02401805 0.01888108]
|
|
|
|
mean value: 0.018979334831237794
|
|
|
|
key: test_mcc
|
|
value: [0.76058095 0.96875 0.87298387 0.85238636 0.93844649 0.71705182
|
|
0.96871896 0.82408564 0.81572458 0.85168687]
|
|
|
|
mean value: 0.8570415535711883
|
|
|
|
key: train_mcc
|
|
value: [0.89640037 0.89358301 0.90678961 0.90724226 0.91054677 0.93466907
|
|
0.88982289 0.88707037 0.90640242 0.9131033 ]
|
|
|
|
mean value: 0.9045630074877337
|
|
|
|
key: test_accuracy
|
|
value: [0.87301587 0.98412698 0.93650794 0.92063492 0.96825397 0.85714286
|
|
0.98412698 0.9047619 0.9047619 0.92063492]
|
|
|
|
mean value: 0.9253968253968253
|
|
|
|
key: train_accuracy
|
|
value: [0.94708995 0.94532628 0.95238095 0.95238095 0.95414462 0.9664903
|
|
0.94356261 0.94179894 0.95238095 0.95590829]
|
|
|
|
mean value: 0.9511463844797178
|
|
|
|
key: test_fscore
|
|
value: [0.88235294 0.98412698 0.93548387 0.92537313 0.96875 0.86567164
|
|
0.98461538 0.91428571 0.91176471 0.92753623]
|
|
|
|
mean value: 0.9299960609058109
|
|
|
|
key: train_fscore
|
|
value: [0.94897959 0.94754653 0.95400341 0.95415959 0.95578231 0.96740995
|
|
0.94557823 0.94416244 0.95368782 0.95697074]
|
|
|
|
mean value: 0.9528280613847903
|
|
|
|
key: test_precision
|
|
value: [0.81081081 0.96875 0.93548387 0.86111111 0.93939394 0.82857143
|
|
0.96969697 0.84210526 0.86111111 0.86486486]
|
|
|
|
mean value: 0.8881899369685873
|
|
|
|
key: train_precision
|
|
value: [0.91776316 0.91205212 0.92409241 0.92131148 0.92434211 0.94
|
|
0.91147541 0.90584416 0.92666667 0.93288591]
|
|
|
|
mean value: 0.9216433403459655
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.93548387 1. 1. 0.90625
|
|
1. 1. 0.96875 1. ]
|
|
|
|
mean value: 0.9778225806451613
|
|
|
|
key: train_recall
|
|
value: [0.98239437 0.98591549 0.98591549 0.98943662 0.98943662 0.99646643
|
|
0.98233216 0.98586572 0.98233216 0.98233216]
|
|
|
|
mean value: 0.9862427213457423
|
|
|
|
key: test_roc_auc
|
|
value: [0.87449597 0.984375 0.93649194 0.921875 0.96875 0.85635081
|
|
0.98387097 0.90322581 0.90372984 0.91935484]
|
|
|
|
mean value: 0.9252520161290323
|
|
|
|
key: train_roc_auc
|
|
value: [0.94702757 0.94525457 0.9523217 0.95231548 0.95408227 0.96654307
|
|
0.94363087 0.94187652 0.95243368 0.95595481]
|
|
|
|
mean value: 0.9511440551435824
|
|
|
|
key: test_jcc
|
|
value: [0.78947368 0.96875 0.87878788 0.86111111 0.93939394 0.76315789
|
|
0.96969697 0.84210526 0.83783784 0.86486486]
|
|
|
|
mean value: 0.8715179443797865
|
|
|
|
key: train_jcc
|
|
value: [0.90291262 0.90032154 0.91205212 0.91233766 0.91530945 0.93687708
|
|
0.89677419 0.89423077 0.91147541 0.91749175]
|
|
|
|
mean value: 0.909978258882526
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03585649 0.04211831 0.03732991 0.0376308 0.03794813 0.03932977
|
|
0.03793216 0.0377214 0.03834581 0.04399371]
|
|
|
|
mean value: 0.038820648193359376
|
|
|
|
key: score_time
|
|
value: [0.01981926 0.01661086 0.01673102 0.02008748 0.01917481 0.01917219
|
|
0.01221251 0.01214361 0.01232505 0.01700068]
|
|
|
|
mean value: 0.016527748107910155
|
|
|
|
key: test_mcc
|
|
value: [0.82507166 0.93844649 0.90900317 0.88034084 0.8415746 0.82408564
|
|
0.90524194 0.82408564 0.85168687 0.82408564]
|
|
|
|
mean value: 0.8623622501382441
|
|
|
|
key: train_mcc
|
|
value: [0.89921054 0.89600606 0.89921054 0.89921054 0.89030498 0.91214664
|
|
0.88982289 0.90567804 0.90567804 0.91539508]
|
|
|
|
mean value: 0.9012663332340688
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.96825397 0.95238095 0.93650794 0.92063492 0.9047619
|
|
0.95238095 0.9047619 0.92063492 0.9047619 ]
|
|
|
|
mean value: 0.926984126984127
|
|
|
|
key: train_accuracy
|
|
value: [0.94708995 0.94532628 0.94708995 0.94708995 0.94356261 0.95414462
|
|
0.94356261 0.95061728 0.95061728 0.95590829]
|
|
|
|
mean value: 0.9485008818342151
|
|
|
|
key: test_fscore
|
|
value: [0.91176471 0.96875 0.95384615 0.93939394 0.91803279 0.91428571
|
|
0.95238095 0.91428571 0.92753623 0.91428571]
|
|
|
|
mean value: 0.9314561913129845
|
|
|
|
key: train_fscore
|
|
value: [0.94983278 0.94824708 0.94983278 0.94983278 0.94594595 0.95608108
|
|
0.94557823 0.95286195 0.95286195 0.95769882]
|
|
|
|
mean value: 0.950877338583359
|
|
|
|
key: test_precision
|
|
value: [0.83783784 0.93939394 0.91176471 0.88571429 0.93333333 0.84210526
|
|
0.96774194 0.84210526 0.86486486 0.84210526]
|
|
|
|
mean value: 0.8866966691984169
|
|
|
|
key: train_precision
|
|
value: [0.9044586 0.9015873 0.9044586 0.9044586 0.90909091 0.91585761
|
|
0.91147541 0.90996785 0.90996785 0.91883117]
|
|
|
|
mean value: 0.9090153882020111
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.90322581 1.
|
|
0.9375 1. 1. 1. ]
|
|
|
|
mean value: 0.9840725806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98591549 1.
|
|
0.98233216 1. 1. 1. ]
|
|
|
|
mean value: 0.9968247648434778
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.96875 0.953125 0.9375 0.9203629 0.90322581
|
|
0.95262097 0.90322581 0.91935484 0.90322581]
|
|
|
|
mean value: 0.9267641129032258
|
|
|
|
key: train_roc_auc
|
|
value: [0.94699647 0.94522968 0.94699647 0.94699647 0.94348778 0.95422535
|
|
0.94363087 0.95070423 0.95070423 0.95598592]
|
|
|
|
mean value: 0.9484957447867417
|
|
|
|
key: test_jcc
|
|
value: [0.83783784 0.93939394 0.91176471 0.88571429 0.84848485 0.84210526
|
|
0.90909091 0.84210526 0.86486486 0.84210526]
|
|
|
|
mean value: 0.8723467180742722
|
|
|
|
key: train_jcc
|
|
value: [0.9044586 0.9015873 0.9044586 0.9044586 0.8974359 0.91585761
|
|
0.89677419 0.90996785 0.90996785 0.91883117]
|
|
|
|
mean value: 0.906379765407742
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.07611585 0.89814162 0.90316176 0.9855361 0.89004588 1.00453162
|
|
0.90314054 1.08939004 0.88236618 0.91221738]
|
|
|
|
mean value: 0.9544646978378296
|
|
|
|
key: score_time
|
|
value: [0.01413941 0.0141294 0.01805401 0.01714873 0.01838374 0.01440954
|
|
0.01439524 0.01422763 0.01429391 0.01429987]
|
|
|
|
mean value: 0.015348148345947266
|
|
|
|
key: test_mcc
|
|
value: [0.88034084 0.88034084 0.90900317 0.85238636 0.93844649 0.79701677
|
|
0.96871896 0.87988269 0.87988269 0.82408564]
|
|
|
|
mean value: 0.881010445833753
|
|
|
|
key: train_mcc
|
|
value: [0.95852786 0.91211935 0.95177427 0.95514567 0.94506342 0.93840782
|
|
0.93509022 0.95178248 0.95178248 0.96192609]
|
|
|
|
mean value: 0.9461619657544672
|
|
|
|
key: test_accuracy
|
|
value: [0.93650794 0.93650794 0.95238095 0.92063492 0.96825397 0.88888889
|
|
0.98412698 0.93650794 0.93650794 0.9047619 ]
|
|
|
|
mean value: 0.9365079365079365
|
|
|
|
key: train_accuracy
|
|
value: [0.97883598 0.95414462 0.97530864 0.97707231 0.97178131 0.96825397
|
|
0.9664903 0.97530864 0.97530864 0.98059965]
|
|
|
|
mean value: 0.9723104056437389
|
|
|
|
key: test_fscore
|
|
value: [0.93939394 0.93939394 0.95384615 0.92537313 0.96875 0.90140845
|
|
0.98461538 0.94117647 0.94117647 0.91428571]
|
|
|
|
mean value: 0.9409419657744186
|
|
|
|
key: train_fscore
|
|
value: [0.97931034 0.95622896 0.97594502 0.97762478 0.97260274 0.96917808
|
|
0.96752137 0.97586207 0.97586207 0.98093588]
|
|
|
|
mean value: 0.9731071305679221
|
|
|
|
key: test_precision
|
|
value: [0.88571429 0.88571429 0.91176471 0.86111111 0.93939394 0.82051282
|
|
0.96969697 0.88888889 0.88888889 0.84210526]
|
|
|
|
mean value: 0.8893791158961437
|
|
|
|
key: train_precision
|
|
value: [0.95945946 0.91612903 0.95302013 0.95622896 0.94666667 0.94019934
|
|
0.93708609 0.95286195 0.95286195 0.96258503]
|
|
|
|
mean value: 0.947709861684225
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.9375 0.953125 0.921875 0.96875 0.88709677
|
|
0.98387097 0.93548387 0.93548387 0.90322581]
|
|
|
|
mean value: 0.9363911290322581
|
|
|
|
key: train_roc_auc
|
|
value: [0.97879859 0.9540636 0.97526502 0.9770318 0.97173145 0.96830986
|
|
0.9665493 0.97535211 0.97535211 0.9806338 ]
|
|
|
|
mean value: 0.9723087642462549
|
|
|
|
key: test_jcc
|
|
value: [0.88571429 0.88571429 0.91176471 0.86111111 0.93939394 0.82051282
|
|
0.96969697 0.88888889 0.88888889 0.84210526]
|
|
|
|
mean value: 0.8893791158961437
|
|
|
|
key: train_jcc
|
|
value: [0.95945946 0.91612903 0.95302013 0.95622896 0.94666667 0.94019934
|
|
0.93708609 0.95286195 0.95286195 0.96258503]
|
|
|
|
mean value: 0.947709861684225
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01505923 0.01232052 0.01174784 0.0115881 0.01155472 0.01149035
|
|
0.0116117 0.0120647 0.01169586 0.0116539 ]
|
|
|
|
mean value: 0.012078690528869628
|
|
|
|
key: score_time
|
|
value: [0.01687002 0.00987291 0.01007128 0.00974035 0.0097487 0.00970936
|
|
0.00976586 0.00981498 0.00976586 0.009902 ]
|
|
|
|
mean value: 0.010526132583618165
|
|
|
|
key: test_mcc
|
|
value: [0.60304557 0.64677961 0.45268961 0.62249498 0.68865372 0.64257546
|
|
0.6385282 0.61764608 0.54307539 0.61764608]
|
|
|
|
mean value: 0.6073134705828839
|
|
|
|
key: train_mcc
|
|
value: [0.59802593 0.63083513 0.65828872 0.63631047 0.6135296 0.63680058
|
|
0.61754216 0.63406994 0.64500382 0.63680058]
|
|
|
|
mean value: 0.6307206948873171
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.79365079 0.66666667 0.77777778 0.84126984 0.79365079
|
|
0.80952381 0.77777778 0.73015873 0.77777778]
|
|
|
|
mean value: 0.7746031746031746
|
|
|
|
key: train_accuracy
|
|
value: [0.77248677 0.78483245 0.80246914 0.78835979 0.77954145 0.78835979
|
|
0.78306878 0.78659612 0.79365079 0.78835979]
|
|
|
|
mean value: 0.7867724867724868
|
|
|
|
key: test_fscore
|
|
value: [0.81081081 0.82666667 0.74698795 0.81578947 0.84848485 0.83116883
|
|
0.83333333 0.82051282 0.79012346 0.82051282]
|
|
|
|
mean value: 0.8144391013771695
|
|
|
|
key: train_fscore
|
|
value: [0.81167883 0.82318841 0.83529412 0.8255814 0.81751825 0.82507289
|
|
0.81885125 0.82387191 0.82869693 0.82507289]
|
|
|
|
mean value: 0.8234826855691431
|
|
|
|
key: test_precision
|
|
value: [0.69767442 0.70454545 0.59615385 0.68888889 0.8 0.71111111
|
|
0.75 0.69565217 0.65306122 0.69565217]
|
|
|
|
mean value: 0.6992739291619835
|
|
|
|
key: train_precision
|
|
value: [0.69326683 0.69950739 0.71717172 0.7029703 0.69825436 0.70223325
|
|
0.7020202 0.70049505 0.7075 0.70223325]
|
|
|
|
mean value: 0.702565235313731
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 1. 1. 0.90322581 1.
|
|
0.9375 1. 1. 1. ]
|
|
|
|
mean value: 0.9808467741935484
|
|
|
|
key: train_recall
|
|
value: [0.97887324 1. 1. 1. 0.98591549 1.
|
|
0.98233216 1. 1. 1. ]
|
|
|
|
mean value: 0.9947120887871398
|
|
|
|
key: test_roc_auc
|
|
value: [0.78074597 0.796875 0.671875 0.78125 0.8422379 0.79032258
|
|
0.80745968 0.77419355 0.72580645 0.77419355]
|
|
|
|
mean value: 0.7744959677419355
|
|
|
|
key: train_roc_auc
|
|
value: [0.77212213 0.7844523 0.80212014 0.78798587 0.77917683 0.78873239
|
|
0.7834196 0.78697183 0.79401408 0.78873239]
|
|
|
|
mean value: 0.7867727566814313
|
|
|
|
key: test_jcc
|
|
value: [0.68181818 0.70454545 0.59615385 0.68888889 0.73684211 0.71111111
|
|
0.71428571 0.69565217 0.65306122 0.69565217]
|
|
|
|
mean value: 0.6878010874382238
|
|
|
|
key: train_jcc
|
|
value: [0.68304668 0.69950739 0.71717172 0.7029703 0.69135802 0.70223325
|
|
0.69326683 0.70049505 0.7075 0.70223325]
|
|
|
|
mean value: 0.6999782494765374
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01138449 0.01064897 0.01076055 0.0105269 0.01066852 0.01089406
|
|
0.01054049 0.01054335 0.01065826 0.01042891]
|
|
|
|
mean value: 0.01070544719696045
|
|
|
|
key: score_time
|
|
value: [0.00897741 0.00890684 0.00890422 0.0089128 0.00970817 0.00971174
|
|
0.00889015 0.00894046 0.00896502 0.00892138]
|
|
|
|
mean value: 0.009083819389343262
|
|
|
|
key: test_mcc
|
|
value: [0.63159952 0.65419917 0.53874599 0.52371369 0.62325024 0.58770161
|
|
0.52419355 0.5892604 0.55611985 0.39717742]
|
|
|
|
mean value: 0.5625961436486265
|
|
|
|
key: train_mcc
|
|
value: [0.65846087 0.6249505 0.62025552 0.61927411 0.56973098 0.59788235
|
|
0.60173527 0.58730139 0.58730139 0.6120118 ]
|
|
|
|
mean value: 0.6078904173367314
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.82539683 0.76190476 0.76190476 0.80952381 0.79365079
|
|
0.76190476 0.79365079 0.77777778 0.6984127 ]
|
|
|
|
mean value: 0.7793650793650794
|
|
|
|
key: train_accuracy
|
|
value: [0.82716049 0.81128748 0.80952381 0.80952381 0.78483245 0.7989418
|
|
0.80070547 0.79365079 0.79365079 0.80599647]
|
|
|
|
mean value: 0.8035273368606701
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.83076923 0.7826087 0.75409836 0.79310345 0.79365079
|
|
0.76190476 0.80597015 0.78787879 0.6984127 ]
|
|
|
|
mean value: 0.7831926338218483
|
|
|
|
key: train_fscore
|
|
value: [0.83666667 0.81956155 0.81569966 0.8125 0.78368794 0.79858657
|
|
0.80347826 0.79292035 0.79292035 0.80633803]
|
|
|
|
mean value: 0.8062359389506882
|
|
|
|
key: test_precision
|
|
value: [0.75675676 0.79411765 0.71052632 0.76666667 0.85185185 0.80645161
|
|
0.77419355 0.77142857 0.76470588 0.70967742]
|
|
|
|
mean value: 0.7706376272550246
|
|
|
|
key: train_precision
|
|
value: [0.7943038 0.78640777 0.79139073 0.80136986 0.78928571 0.79858657
|
|
0.79109589 0.79432624 0.79432624 0.80350877]
|
|
|
|
mean value: 0.794460158728333
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.87096774 0.87096774 0.74193548 0.74193548 0.78125
|
|
0.75 0.84375 0.8125 0.6875 ]
|
|
|
|
mean value: 0.8004032258064516
|
|
|
|
key: train_recall
|
|
value: [0.88380282 0.8556338 0.8415493 0.82394366 0.77816901 0.79858657
|
|
0.81625442 0.79151943 0.79151943 0.80918728]
|
|
|
|
mean value: 0.8190165729358483
|
|
|
|
key: test_roc_auc
|
|
value: [0.8109879 0.82610887 0.76360887 0.76159274 0.80846774 0.79385081
|
|
0.76209677 0.79284274 0.77721774 0.69858871]
|
|
|
|
mean value: 0.7795362903225806
|
|
|
|
key: train_roc_auc
|
|
value: [0.82706042 0.81120913 0.80946723 0.80949833 0.78484422 0.79894117
|
|
0.80073284 0.79364704 0.79364704 0.80600209]
|
|
|
|
mean value: 0.803504951973324
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.71052632 0.64285714 0.60526316 0.65714286 0.65789474
|
|
0.61538462 0.675 0.65 0.53658537]
|
|
|
|
mean value: 0.645065419176459
|
|
|
|
key: train_jcc
|
|
value: [0.71919771 0.69428571 0.68876081 0.68421053 0.64431487 0.66470588
|
|
0.67151163 0.6568915 0.6568915 0.67551622]
|
|
|
|
mean value: 0.6756286349710039
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00996184 0.01097226 0.010885 0.01106524 0.0111053 0.01099801
|
|
0.01090431 0.01117992 0.0109098 0.01006746]
|
|
|
|
mean value: 0.01080491542816162
|
|
|
|
key: score_time
|
|
value: [0.01308823 0.0128336 0.01352763 0.01325631 0.01360226 0.01324463
|
|
0.01317143 0.01309395 0.01318073 0.01915669]
|
|
|
|
mean value: 0.013815546035766601
|
|
|
|
key: test_mcc
|
|
value: [0.79833297 0.82507166 0.77211 0.82507166 0.93844649 0.69290694
|
|
0.85168687 0.82408564 0.85168687 0.82408564]
|
|
|
|
mean value: 0.8203484751284951
|
|
|
|
key: train_mcc
|
|
value: [0.86443423 0.85821815 0.85512181 0.85512181 0.84587844 0.86139045
|
|
0.85211081 0.86761629 0.85828944 0.83984461]
|
|
|
|
mean value: 0.8558026032115846
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.9047619 0.87301587 0.9047619 0.96825397 0.82539683
|
|
0.92063492 0.9047619 0.92063492 0.9047619 ]
|
|
|
|
mean value: 0.9015873015873016
|
|
|
|
key: train_accuracy
|
|
value: [0.92768959 0.92416226 0.92239859 0.92239859 0.91710758 0.92592593
|
|
0.92063492 0.92945326 0.92416226 0.91358025]
|
|
|
|
mean value: 0.9227513227513228
|
|
|
|
key: test_fscore
|
|
value: [0.89855072 0.91176471 0.88571429 0.91176471 0.96875 0.85333333
|
|
0.92753623 0.91428571 0.92753623 0.91428571]
|
|
|
|
mean value: 0.9113521647789551
|
|
|
|
key: train_fscore
|
|
value: [0.93267652 0.92962357 0.92810458 0.92810458 0.92357724 0.93092105
|
|
0.92635025 0.9339934 0.92939245 0.9203252 ]
|
|
|
|
mean value: 0.9283068820260564
|
|
|
|
key: test_precision
|
|
value: [0.81578947 0.83783784 0.79487179 0.83783784 0.93939394 0.74418605
|
|
0.86486486 0.84210526 0.86486486 0.84210526]
|
|
|
|
mean value: 0.8383857186182767
|
|
|
|
key: train_precision
|
|
value: [0.87384615 0.86850153 0.86585366 0.86585366 0.85800604 0.87076923
|
|
0.86280488 0.87616099 0.86809816 0.85240964]
|
|
|
|
mean value: 0.8662303939860889
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.890625 0.90625 0.875 0.90625 0.96875 0.82258065
|
|
0.91935484 0.90322581 0.91935484 0.90322581]
|
|
|
|
mean value: 0.9014616935483871
|
|
|
|
key: train_roc_auc
|
|
value: [0.92756184 0.92402827 0.92226148 0.92226148 0.91696113 0.92605634
|
|
0.92077465 0.92957746 0.92429577 0.91373239]
|
|
|
|
mean value: 0.9227510824665306
|
|
|
|
key: test_jcc
|
|
value: [0.81578947 0.83783784 0.79487179 0.83783784 0.93939394 0.74418605
|
|
0.86486486 0.84210526 0.86486486 0.84210526]
|
|
|
|
mean value: 0.8383857186182767
|
|
|
|
key: train_jcc
|
|
value: [0.87384615 0.86850153 0.86585366 0.86585366 0.85800604 0.87076923
|
|
0.86280488 0.87616099 0.86809816 0.85240964]
|
|
|
|
mean value: 0.8662303939860889
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02535415 0.02325654 0.02342606 0.02537203 0.0254755 0.02365398
|
|
0.02501225 0.0255065 0.02615166 0.02310085]
|
|
|
|
mean value: 0.02463095188140869
|
|
|
|
key: score_time
|
|
value: [0.01334715 0.01307654 0.01226735 0.01226377 0.0125246 0.01222873
|
|
0.01266265 0.01354408 0.01350522 0.01280594]
|
|
|
|
mean value: 0.012822604179382325
|
|
|
|
key: test_mcc
|
|
value: [0.85238636 0.90900317 0.82507166 0.88034084 0.93844649 0.79701677
|
|
0.90873893 0.85168687 0.87988269 0.85168687]
|
|
|
|
mean value: 0.8694260665119499
|
|
|
|
key: train_mcc
|
|
value: [0.90887831 0.89921054 0.90564657 0.90242401 0.91211935 0.92192075
|
|
0.91214664 0.92192075 0.91214664 0.92519819]
|
|
|
|
mean value: 0.9121611729740694
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.95238095 0.9047619 0.93650794 0.96825397 0.88888889
|
|
0.95238095 0.92063492 0.93650794 0.92063492]
|
|
|
|
mean value: 0.9301587301587302
|
|
|
|
key: train_accuracy
|
|
value: [0.95238095 0.94708995 0.95061728 0.94885362 0.95414462 0.95943563
|
|
0.95414462 0.95943563 0.95414462 0.96119929]
|
|
|
|
mean value: 0.9541446208112875
|
|
|
|
key: test_fscore
|
|
value: [0.92537313 0.95384615 0.91176471 0.93939394 0.96875 0.90140845
|
|
0.95522388 0.92753623 0.94117647 0.92753623]
|
|
|
|
mean value: 0.9352009199108396
|
|
|
|
key: train_fscore
|
|
value: [0.95462185 0.94983278 0.95302013 0.95142379 0.95622896 0.96095076
|
|
0.95608108 0.96095076 0.95608108 0.96258503]
|
|
|
|
mean value: 0.9561776224900362
|
|
|
|
key: test_precision
|
|
value: [0.86111111 0.91176471 0.83783784 0.88571429 0.93939394 0.82051282
|
|
0.91428571 0.86486486 0.88888889 0.86486486]
|
|
|
|
mean value: 0.878923903335668
|
|
|
|
key: train_precision
|
|
value: [0.91318328 0.9044586 0.91025641 0.90734824 0.91612903 0.9248366
|
|
0.91585761 0.9248366 0.91585761 0.92786885]
|
|
|
|
mean value: 0.9160632829224239
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.953125 0.90625 0.9375 0.96875 0.88709677
|
|
0.9516129 0.91935484 0.93548387 0.91935484]
|
|
|
|
mean value: 0.9300403225806452
|
|
|
|
key: train_roc_auc
|
|
value: [0.95229682 0.94699647 0.95053004 0.94876325 0.9540636 0.95950704
|
|
0.95422535 0.95950704 0.95422535 0.96126761]
|
|
|
|
mean value: 0.9541382571044642
|
|
|
|
key: test_jcc
|
|
value: [0.86111111 0.91176471 0.83783784 0.88571429 0.93939394 0.82051282
|
|
0.91428571 0.86486486 0.88888889 0.86486486]
|
|
|
|
mean value: 0.878923903335668
|
|
|
|
key: train_jcc
|
|
value: [0.91318328 0.9044586 0.91025641 0.90734824 0.91612903 0.9248366
|
|
0.91585761 0.9248366 0.91585761 0.92786885]
|
|
|
|
mean value: 0.9160632829224239
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.08596945 2.2425313 2.47848535 2.08845162 2.11341381 2.08228493
|
|
2.18272972 2.13826609 2.21471238 2.16022182]
|
|
|
|
mean value: 2.178706645965576
|
|
|
|
key: score_time
|
|
value: [0.01465487 0.02013016 0.01518297 0.01461458 0.01450729 0.01934314
|
|
0.01309943 0.01322579 0.01317143 0.0130136 ]
|
|
|
|
mean value: 0.015094327926635741
|
|
|
|
key: test_mcc
|
|
value: [0.90900317 0.96875 0.93844649 0.90900317 0.93844649 0.87988269
|
|
0.96871896 0.85168687 0.90873893 0.85168687]
|
|
|
|
mean value: 0.9124363658501387
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 0.99647883 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996478829505658
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.98412698 0.96825397 0.95238095 0.96825397 0.93650794
|
|
0.98412698 0.92063492 0.95238095 0.92063492]
|
|
|
|
mean value: 0.9539682539682539
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 0.99823633 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998236331569665
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.98412698 0.96875 0.95384615 0.96875 0.94117647
|
|
0.98461538 0.92753623 0.95522388 0.92753623]
|
|
|
|
mean value: 0.9565407491388043
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 0.99824253 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998242530755712
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.96875 0.93939394 0.91176471 0.93939394 0.88888889
|
|
0.96969697 0.86486486 0.91428571 0.86486486]
|
|
|
|
mean value: 0.9173668593153887
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 0.99649123 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996491228070176
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.984375 0.96875 0.953125 0.96875 0.93548387
|
|
0.98387097 0.91935484 0.9516129 0.91935484]
|
|
|
|
mean value: 0.9537802419354839
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 0.99823322 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9998233215547703
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.96875 0.93939394 0.91176471 0.93939394 0.88888889
|
|
0.96969697 0.86486486 0.91428571 0.86486486]
|
|
|
|
mean value: 0.9173668593153887
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 0.99649123 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996491228070176
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03869104 0.0326786 0.03572583 0.03531909 0.03040981 0.03377557
|
|
0.03325844 0.03369021 0.04008651 0.03066468]
|
|
|
|
mean value: 0.03442997932434082
|
|
|
|
key: score_time
|
|
value: [0.01288891 0.01198602 0.010216 0.00937152 0.00969505 0.01157212
|
|
0.00913906 0.01154947 0.01000953 0.01034784]
|
|
|
|
mean value: 0.010677552223205567
|
|
|
|
key: test_mcc
|
|
value: [0.93844649 0.96875 0.88034084 0.96875 0.93844649 0.93832585
|
|
0.90873893 0.96871896 0.90873893 0.82408564]
|
|
|
|
mean value: 0.924334215315277
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96825397 0.98412698 0.93650794 0.98412698 0.96825397 0.96825397
|
|
0.95238095 0.98412698 0.95238095 0.9047619 ]
|
|
|
|
mean value: 0.9603174603174602
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.98412698 0.93939394 0.98412698 0.96875 0.96969697
|
|
0.95522388 0.98461538 0.95522388 0.91428571]
|
|
|
|
mean value: 0.9624193737440007
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.96875 0.88571429 0.96875 0.93939394 0.94117647
|
|
0.91428571 0.96969697 0.91428571 0.84210526]
|
|
|
|
mean value: 0.9283552296516693
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.984375 0.9375 0.984375 0.96875 0.96774194
|
|
0.9516129 0.98387097 0.9516129 0.90322581]
|
|
|
|
mean value: 0.9601814516129032
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.96875 0.88571429 0.96875 0.93939394 0.94117647
|
|
0.91428571 0.96969697 0.91428571 0.84210526]
|
|
|
|
mean value: 0.9283552296516693
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.12414789 0.12790179 0.13315892 0.13366532 0.1253016 0.11637568
|
|
0.11803651 0.11804795 0.11743784 0.11930633]
|
|
|
|
mean value: 0.12333798408508301
|
|
|
|
key: score_time
|
|
value: [0.01952243 0.01906157 0.02006102 0.02045536 0.01763082 0.01792073
|
|
0.01782751 0.01823306 0.01890826 0.01866674]
|
|
|
|
mean value: 0.018828749656677246
|
|
|
|
key: test_mcc
|
|
value: [0.93844649 1. 1. 0.96875 1. 0.93832585
|
|
1. 0.93832585 0.96871896 0.96871896]
|
|
|
|
mean value: 0.972128611836196
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96825397 1. 1. 0.98412698 1. 0.96825397
|
|
1. 0.96825397 0.98412698 0.98412698]
|
|
|
|
mean value: 0.9857142857142857
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96875 1. 1. 0.98412698 1. 0.96969697
|
|
1. 0.96969697 0.98461538 0.98461538]
|
|
|
|
mean value: 0.9861501692751693
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.93939394 1. 1. 0.96875 1. 0.94117647
|
|
1. 0.94117647 0.96969697 0.96969697]
|
|
|
|
mean value: 0.972989081996435
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 1. 1. 0.984375 1. 0.96774194
|
|
1. 0.96774194 0.98387097 0.98387097]
|
|
|
|
mean value: 0.9856350806451613
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 1. 1. 0.96875 1. 0.94117647
|
|
1. 0.94117647 0.96969697 0.96969697]
|
|
|
|
mean value: 0.972989081996435
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.38
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0120995 0.01139975 0.01160812 0.01103592 0.01151562 0.0115397
|
|
0.01082873 0.01184201 0.01139522 0.01096535]
|
|
|
|
mean value: 0.011422991752624512
|
|
|
|
key: score_time
|
|
value: [0.00960779 0.00969839 0.00928497 0.00940084 0.00971532 0.009619
|
|
0.00971985 0.00964451 0.00917172 0.00926471]
|
|
|
|
mean value: 0.009512710571289062
|
|
|
|
key: test_mcc
|
|
value: [0.88034084 1. 0.93844649 0.90900317 0.96875 0.90873893
|
|
0.90873893 0.93832585 0.93832585 0.87988269]
|
|
|
|
mean value: 0.9270552773277502
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93650794 1. 0.96825397 0.95238095 0.98412698 0.95238095
|
|
0.95238095 0.96825397 0.96825397 0.93650794]
|
|
|
|
mean value: 0.9619047619047619
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93939394 1. 0.96875 0.95384615 0.98412698 0.95522388
|
|
0.95522388 0.96969697 0.96969697 0.94117647]
|
|
|
|
mean value: 0.9637135248543283
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88571429 1. 0.93939394 0.91176471 0.96875 0.91428571
|
|
0.91428571 0.94117647 0.94117647 0.88888889]
|
|
|
|
mean value: 0.9305436189627366
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 1. 0.96875 0.953125 0.984375 0.9516129
|
|
0.9516129 0.96774194 0.96774194 0.93548387]
|
|
|
|
mean value: 0.9617943548387097
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88571429 1. 0.93939394 0.91176471 0.96875 0.91428571
|
|
0.91428571 0.94117647 0.94117647 0.88888889]
|
|
|
|
mean value: 0.9305436189627366
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.71363235 1.71835566 1.72833467 1.70863962 1.71910572 1.6981771
|
|
1.71010995 1.70283628 1.7230835 1.73624706]
|
|
|
|
mean value: 1.715852189064026
|
|
|
|
key: score_time
|
|
value: [0.09295297 0.09422278 0.09349656 0.09335542 0.09485149 0.09592009
|
|
0.09276414 0.09293246 0.09736371 0.09843373]
|
|
|
|
mean value: 0.09462933540344239
|
|
|
|
key: test_mcc
|
|
value: [0.88034084 0.96875 1. 0.96875 1. 0.90873893
|
|
1. 0.93832585 0.96871896 0.90873893]
|
|
|
|
mean value: 0.9542363525898571
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.93650794 0.98412698 1. 0.98412698 1. 0.95238095
|
|
1. 0.96825397 0.98412698 0.95238095]
|
|
|
|
mean value: 0.9761904761904762
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93939394 0.98412698 1. 0.98412698 1. 0.95522388
|
|
1. 0.96969697 0.98461538 0.95522388]
|
|
|
|
mean value: 0.9772408023154292
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88571429 0.96875 1. 0.96875 1. 0.91428571
|
|
1. 0.94117647 0.96969697 0.91428571]
|
|
|
|
mean value: 0.9562659154570919
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.984375 1. 0.984375 1. 0.9516129
|
|
1. 0.96774194 0.98387097 0.9516129 ]
|
|
|
|
mean value: 0.9761088709677419
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.88571429 0.96875 1. 0.96875 1. 0.91428571
|
|
1. 0.94117647 0.96969697 0.91428571]
|
|
|
|
mean value: 0.9562659154570919
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.00334644 0.97311354 0.97188091 0.96236587 1.01098251 0.96165252
|
|
0.9656539 1.00930166 1.0024457 0.99207234]
|
|
|
|
mean value: 0.9852815389633178
|
|
|
|
key: score_time
|
|
value: [0.28337717 0.24201965 0.28900814 0.20848203 0.29436612 0.27408195
|
|
0.27980685 0.2509625 0.15058684 0.34476352]
|
|
|
|
mean value: 0.2617454767227173
|
|
|
|
key: test_mcc
|
|
value: [0.85238636 0.93844649 0.93844649 0.90900317 1. 0.82408564
|
|
0.96871896 0.90873893 0.96871896 0.87988269]
|
|
|
|
mean value: 0.9188427697924966
|
|
|
|
key: train_mcc
|
|
value: [0.972167 0.972167 0.96532513 0.972167 0.96532513 0.96874387
|
|
0.96532937 0.97560706 0.96874387 0.96874387]
|
|
|
|
mean value: 0.9694319288767332
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.96825397 0.96825397 0.95238095 1. 0.9047619
|
|
0.98412698 0.95238095 0.98412698 0.93650794]
|
|
|
|
mean value: 0.9571428571428571
|
|
|
|
key: train_accuracy
|
|
value: [0.98589065 0.98589065 0.98236332 0.98589065 0.98236332 0.98412698
|
|
0.98236332 0.98765432 0.98412698 0.98412698]
|
|
|
|
mean value: 0.9844797178130511
|
|
|
|
key: test_fscore
|
|
value: [0.92537313 0.96875 0.96875 0.95384615 1. 0.91428571
|
|
0.98461538 0.95522388 0.98461538 0.94117647]
|
|
|
|
mean value: 0.9596636122876246
|
|
|
|
key: train_fscore
|
|
value: [0.98611111 0.98611111 0.98269896 0.98611111 0.98269896 0.98434783
|
|
0.98263889 0.9877836 0.98434783 0.98434783]
|
|
|
|
mean value: 0.9847197219471963
|
|
|
|
key: test_precision
|
|
value: [0.86111111 0.93939394 0.93939394 0.91176471 1. 0.84210526
|
|
0.96969697 0.91428571 0.96969697 0.88888889]
|
|
|
|
mean value: 0.923633750150778
|
|
|
|
key: train_precision
|
|
value: [0.97260274 0.97260274 0.96598639 0.97260274 0.96598639 0.96917808
|
|
0.96587031 0.97586207 0.96917808 0.96917808]
|
|
|
|
mean value: 0.9699047631001824
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.96875 0.96875 0.953125 1. 0.90322581
|
|
0.98387097 0.9516129 0.98387097 0.93548387]
|
|
|
|
mean value: 0.9570564516129032
|
|
|
|
key: train_roc_auc
|
|
value: [0.98586572 0.98586572 0.98233216 0.98586572 0.98233216 0.98415493
|
|
0.98239437 0.98767606 0.98415493 0.98415493]
|
|
|
|
mean value: 0.9844796695366546
|
|
|
|
key: test_jcc
|
|
value: [0.86111111 0.93939394 0.93939394 0.91176471 1. 0.84210526
|
|
0.96969697 0.91428571 0.96969697 0.88888889]
|
|
|
|
mean value: 0.923633750150778
|
|
|
|
key: train_jcc
|
|
value: [0.97260274 0.97260274 0.96598639 0.97260274 0.96598639 0.96917808
|
|
0.96587031 0.97586207 0.96917808 0.96917808]
|
|
|
|
mean value: 0.9699047631001824
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02652526 0.01135445 0.01183033 0.01073074 0.01137686 0.01203132
|
|
0.01070952 0.0118649 0.01197577 0.01080942]
|
|
|
|
mean value: 0.012920856475830078
|
|
|
|
key: score_time
|
|
value: [0.0096941 0.00950265 0.00960016 0.00941467 0.00990725 0.00982475
|
|
0.00936031 0.00984359 0.00971794 0.0090003 ]
|
|
|
|
mean value: 0.009586572647094727
|
|
|
|
key: test_mcc
|
|
value: [0.63159952 0.65419917 0.53874599 0.52371369 0.62325024 0.58770161
|
|
0.52419355 0.5892604 0.55611985 0.39717742]
|
|
|
|
mean value: 0.5625961436486265
|
|
|
|
key: train_mcc
|
|
value: [0.65846087 0.6249505 0.62025552 0.61927411 0.56973098 0.59788235
|
|
0.60173527 0.58730139 0.58730139 0.6120118 ]
|
|
|
|
mean value: 0.6078904173367314
|
|
|
|
key: test_accuracy
|
|
value: [0.80952381 0.82539683 0.76190476 0.76190476 0.80952381 0.79365079
|
|
0.76190476 0.79365079 0.77777778 0.6984127 ]
|
|
|
|
mean value: 0.7793650793650794
|
|
|
|
key: train_accuracy
|
|
value: [0.82716049 0.81128748 0.80952381 0.80952381 0.78483245 0.7989418
|
|
0.80070547 0.79365079 0.79365079 0.80599647]
|
|
|
|
mean value: 0.8035273368606701
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.83076923 0.7826087 0.75409836 0.79310345 0.79365079
|
|
0.76190476 0.80597015 0.78787879 0.6984127 ]
|
|
|
|
mean value: 0.7831926338218483
|
|
|
|
key: train_fscore
|
|
value: [0.83666667 0.81956155 0.81569966 0.8125 0.78368794 0.79858657
|
|
0.80347826 0.79292035 0.79292035 0.80633803]
|
|
|
|
mean value: 0.8062359389506882
|
|
|
|
key: test_precision
|
|
value: [0.75675676 0.79411765 0.71052632 0.76666667 0.85185185 0.80645161
|
|
0.77419355 0.77142857 0.76470588 0.70967742]
|
|
|
|
mean value: 0.7706376272550246
|
|
|
|
key: train_precision
|
|
value: [0.7943038 0.78640777 0.79139073 0.80136986 0.78928571 0.79858657
|
|
0.79109589 0.79432624 0.79432624 0.80350877]
|
|
|
|
mean value: 0.794460158728333
|
|
|
|
key: test_recall
|
|
value: [0.90322581 0.87096774 0.87096774 0.74193548 0.74193548 0.78125
|
|
0.75 0.84375 0.8125 0.6875 ]
|
|
|
|
mean value: 0.8004032258064516
|
|
|
|
key: train_recall
|
|
value: [0.88380282 0.8556338 0.8415493 0.82394366 0.77816901 0.79858657
|
|
0.81625442 0.79151943 0.79151943 0.80918728]
|
|
|
|
mean value: 0.8190165729358483
|
|
|
|
key: test_roc_auc
|
|
value: [0.8109879 0.82610887 0.76360887 0.76159274 0.80846774 0.79385081
|
|
0.76209677 0.79284274 0.77721774 0.69858871]
|
|
|
|
mean value: 0.7795362903225806
|
|
|
|
key: train_roc_auc
|
|
value: [0.82706042 0.81120913 0.80946723 0.80949833 0.78484422 0.79894117
|
|
0.80073284 0.79364704 0.79364704 0.80600209]
|
|
|
|
mean value: 0.803504951973324
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.71052632 0.64285714 0.60526316 0.65714286 0.65789474
|
|
0.61538462 0.675 0.65 0.53658537]
|
|
|
|
mean value: 0.645065419176459
|
|
|
|
key: train_jcc
|
|
value: [0.71919771 0.69428571 0.68876081 0.68421053 0.64431487 0.66470588
|
|
0.67151163 0.6568915 0.6568915 0.67551622]
|
|
|
|
mean value: 0.6756286349710039
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.10989952 0.08649611 0.09431887 0.09879065 0.0862627 0.0911324
|
|
0.08850598 0.09136868 0.24701285 0.08706379]
|
|
|
|
mean value: 0.10808515548706055
|
|
|
|
key: score_time
|
|
value: [0.01123381 0.01106572 0.01129341 0.01121545 0.01135254 0.01113534
|
|
0.01092839 0.01113176 0.01107979 0.01111746]
|
|
|
|
mean value: 0.011155366897583008
|
|
|
|
key: test_mcc
|
|
value: [0.82507166 0.96875 0.96875 0.93844649 0.96875 0.90873893
|
|
1. 0.96871896 0.93832585 0.90873893]
|
|
|
|
mean value: 0.9394290834187049
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.98412698 0.98412698 0.96825397 0.98412698 0.95238095
|
|
1. 0.98412698 0.96825397 0.95238095]
|
|
|
|
mean value: 0.9682539682539683
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.91176471 0.98412698 0.98412698 0.96875 0.98412698 0.95522388
|
|
1. 0.98461538 0.96969697 0.95522388]
|
|
|
|
mean value: 0.969765577376969
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83783784 0.96875 0.96875 0.93939394 0.96875 0.91428571
|
|
1. 0.96969697 0.94117647 0.91428571]
|
|
|
|
mean value: 0.9422926646088411
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.984375 0.984375 0.96875 0.984375 0.9516129
|
|
1. 0.98387097 0.96774194 0.9516129 ]
|
|
|
|
mean value: 0.9682963709677419
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.83783784 0.96875 0.96875 0.93939394 0.96875 0.91428571
|
|
1. 0.96969697 0.94117647 0.91428571]
|
|
|
|
mean value: 0.9422926646088411
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.4
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.04713345 0.0557878 0.04739308 0.07503843 0.06757092 0.07611299
|
|
0.06051421 0.08022571 0.05500579 0.05546594]
|
|
|
|
mean value: 0.062024831771850586
|
|
|
|
key: score_time
|
|
value: [0.01955056 0.01227689 0.01909924 0.01221609 0.01222396 0.01253867
|
|
0.01922131 0.01922393 0.012182 0.01211929]
|
|
|
|
mean value: 0.015065193176269531
|
|
|
|
key: test_mcc
|
|
value: [0.72098341 0.85238636 0.82507166 0.82507166 0.90900317 0.71842121
|
|
0.85168687 0.79701677 0.85168687 0.69290694]
|
|
|
|
mean value: 0.8044234921578046
|
|
|
|
key: train_mcc
|
|
value: [0.91211935 0.88962366 0.90887831 0.89600606 0.89600606 0.91214664
|
|
0.89924646 0.90567804 0.89604431 0.91865308]
|
|
|
|
mean value: 0.9034401951269692
|
|
|
|
key: test_accuracy
|
|
value: [0.84126984 0.92063492 0.9047619 0.9047619 0.95238095 0.84126984
|
|
0.92063492 0.88888889 0.92063492 0.82539683]
|
|
|
|
mean value: 0.892063492063492
|
|
|
|
key: train_accuracy
|
|
value: [0.95414462 0.94179894 0.95238095 0.94532628 0.94532628 0.95414462
|
|
0.94708995 0.95061728 0.94532628 0.95767196]
|
|
|
|
mean value: 0.9493827160493827
|
|
|
|
key: test_fscore
|
|
value: [0.86111111 0.92537313 0.91176471 0.91176471 0.95384615 0.86486486
|
|
0.92753623 0.90140845 0.92753623 0.85333333]
|
|
|
|
mean value: 0.9038538923720869
|
|
|
|
key: train_fscore
|
|
value: [0.95622896 0.94509151 0.95462185 0.94824708 0.94824708 0.95608108
|
|
0.94966443 0.95286195 0.9480737 0.95932203]
|
|
|
|
mean value: 0.9518439675253847
|
|
|
|
key: test_precision
|
|
value: [0.75609756 0.86111111 0.83783784 0.83783784 0.91176471 0.76190476
|
|
0.86486486 0.82051282 0.86486486 0.74418605]
|
|
|
|
mean value: 0.826098241230369
|
|
|
|
key: train_precision
|
|
value: [0.91612903 0.89589905 0.91318328 0.9015873 0.9015873 0.91585761
|
|
0.90415335 0.90996785 0.90127389 0.9218241 ]
|
|
|
|
mean value: 0.9081462763857785
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.84375 0.921875 0.90625 0.90625 0.953125 0.83870968
|
|
0.91935484 0.88709677 0.91935484 0.82258065]
|
|
|
|
mean value: 0.8918346774193548
|
|
|
|
key: train_roc_auc
|
|
value: [0.9540636 0.94169611 0.95229682 0.94522968 0.94522968 0.95422535
|
|
0.9471831 0.95070423 0.94542254 0.95774648]
|
|
|
|
mean value: 0.9493797591200916
|
|
|
|
key: test_jcc
|
|
value: [0.75609756 0.86111111 0.83783784 0.83783784 0.91176471 0.76190476
|
|
0.86486486 0.82051282 0.86486486 0.74418605]
|
|
|
|
mean value: 0.826098241230369
|
|
|
|
key: train_jcc
|
|
value: [0.91612903 0.89589905 0.91318328 0.9015873 0.9015873 0.91585761
|
|
0.90415335 0.90996785 0.90127389 0.9218241 ]
|
|
|
|
mean value: 0.9081462763857785
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01452565 0.01298976 0.01056457 0.01098371 0.01002884 0.01002645
|
|
0.01004529 0.01023436 0.01015925 0.01043272]
|
|
|
|
mean value: 0.010999059677124024
|
|
|
|
key: score_time
|
|
value: [0.01194215 0.00911331 0.00909233 0.00875807 0.00872374 0.0087707
|
|
0.00882459 0.00869441 0.00901484 0.00964642]
|
|
|
|
mean value: 0.009258055686950683
|
|
|
|
key: test_mcc
|
|
value: [0.73343622 0.74634526 0.62249498 0.6712536 0.65991202 0.70447456
|
|
0.66625621 0.69290694 0.58371723 0.64257546]
|
|
|
|
mean value: 0.6723372484820773
|
|
|
|
key: train_mcc
|
|
value: [0.6807355 0.67620933 0.68201852 0.68784156 0.69271804 0.68394634
|
|
0.69423308 0.68523763 0.68991641 0.68814449]
|
|
|
|
mean value: 0.6861000905144914
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.85714286 0.77777778 0.80952381 0.82539683 0.84126984
|
|
0.82539683 0.82539683 0.77777778 0.79365079]
|
|
|
|
mean value: 0.819047619047619
|
|
|
|
key: train_accuracy
|
|
value: [0.82363316 0.82186949 0.82539683 0.82892416 0.82892416 0.82539683
|
|
0.83068783 0.82716049 0.83421517 0.82892416]
|
|
|
|
mean value: 0.8275132275132275
|
|
|
|
key: test_fscore
|
|
value: [0.86956522 0.87323944 0.81578947 0.83783784 0.8358209 0.86111111
|
|
0.84507042 0.85333333 0.81081081 0.83116883]
|
|
|
|
mean value: 0.8433747370014757
|
|
|
|
key: train_fscore
|
|
value: [0.84756098 0.84580153 0.84839204 0.85099846 0.85235921 0.84839204
|
|
0.85276074 0.84923077 0.85220126 0.85053929]
|
|
|
|
mean value: 0.8498236302765467
|
|
|
|
key: test_precision
|
|
value: [0.78947368 0.775 0.68888889 0.72093023 0.77777778 0.775
|
|
0.76923077 0.74418605 0.71428571 0.71111111]
|
|
|
|
mean value: 0.7465884224574555
|
|
|
|
key: train_precision
|
|
value: [0.74731183 0.74663073 0.75067751 0.75476839 0.75067024 0.74864865
|
|
0.75338753 0.7520436 0.76770538 0.75409836]
|
|
|
|
mean value: 0.7525942218498527
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 1. 1. 0.90322581 0.96875
|
|
0.9375 1. 0.9375 1. ]
|
|
|
|
mean value: 0.9714717741935484
|
|
|
|
key: train_recall
|
|
value: [0.97887324 0.97535211 0.97535211 0.97535211 0.98591549 0.97879859
|
|
0.98233216 0.97526502 0.95759717 0.97526502]
|
|
|
|
mean value: 0.9760103020952571
|
|
|
|
key: test_roc_auc
|
|
value: [0.85887097 0.859375 0.78125 0.8125 0.8266129 0.83921371
|
|
0.82358871 0.82258065 0.77520161 0.79032258]
|
|
|
|
mean value: 0.8189516129032258
|
|
|
|
key: train_roc_auc
|
|
value: [0.82335888 0.82159832 0.82513189 0.82866546 0.82864679 0.8256669
|
|
0.83095481 0.82742124 0.83443239 0.8291818 ]
|
|
|
|
mean value: 0.8275058478076942
|
|
|
|
key: test_jcc
|
|
value: [0.76923077 0.775 0.68888889 0.72093023 0.71794872 0.75609756
|
|
0.73170732 0.74418605 0.68181818 0.71111111]
|
|
|
|
mean value: 0.7296918826116217
|
|
|
|
key: train_jcc
|
|
value: [0.73544974 0.73280423 0.73670213 0.74064171 0.74270557 0.73670213
|
|
0.74331551 0.73796791 0.74246575 0.73994638]
|
|
|
|
mean value: 0.7388701061676443
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02136731 0.01917386 0.0191288 0.02424407 0.02366519 0.02134633
|
|
0.02548814 0.0253067 0.02365065 0.02221847]
|
|
|
|
mean value: 0.022558951377868654
|
|
|
|
key: score_time
|
|
value: [0.00999188 0.01116729 0.01179528 0.01189232 0.01184058 0.01699877
|
|
0.01195812 0.01211953 0.01199746 0.01622009]
|
|
|
|
mean value: 0.012598133087158203
|
|
|
|
key: test_mcc
|
|
value: [0.84530217 0.90900317 0.88034084 0.79833297 0.8415746 0.77042092
|
|
0.87298387 0.85168687 0.93832585 0.82408564]
|
|
|
|
mean value: 0.8532056905123313
|
|
|
|
key: train_mcc
|
|
value: [0.92981985 0.90242401 0.86443423 0.83365727 0.92689488 0.89924646
|
|
0.9131033 0.92192075 0.94173574 0.87387454]
|
|
|
|
mean value: 0.9007111018874211
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.95238095 0.93650794 0.88888889 0.92063492 0.87301587
|
|
0.93650794 0.92063492 0.96825397 0.9047619 ]
|
|
|
|
mean value: 0.9222222222222222
|
|
|
|
key: train_accuracy
|
|
value: [0.96472663 0.94885362 0.92768959 0.91005291 0.96296296 0.94708995
|
|
0.95590829 0.95943563 0.97001764 0.9329806 ]
|
|
|
|
mean value: 0.9479717813051146
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.95384615 0.93939394 0.89855072 0.91803279 0.88888889
|
|
0.9375 0.92753623 0.96969697 0.91428571]
|
|
|
|
mean value: 0.9270808332595574
|
|
|
|
key: train_fscore
|
|
value: [0.96527778 0.95142379 0.93267652 0.91760905 0.96385542 0.94966443
|
|
0.95697074 0.96095076 0.97084048 0.93708609]
|
|
|
|
mean value: 0.9506355057422275
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.91176471 0.88571429 0.81578947 0.93333333 0.8
|
|
0.9375 0.86486486 0.94117647 0.84210526]
|
|
|
|
mean value: 0.8814601338401648
|
|
|
|
key: train_precision
|
|
value: [0.95205479 0.90734824 0.87384615 0.84776119 0.94276094 0.90415335
|
|
0.93288591 0.9248366 0.94333333 0.88161994]
|
|
|
|
mean value: 0.9110600460977081
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 1. 1. 0.90322581 1.
|
|
0.9375 1. 1. 1. ]
|
|
|
|
mean value: 0.9808467741935484
|
|
|
|
key: train_recall
|
|
value: [0.97887324 1. 1. 1. 0.98591549 1.
|
|
0.98233216 1. 1. 1. ]
|
|
|
|
mean value: 0.9947120887871398
|
|
|
|
key: test_roc_auc
|
|
value: [0.92137097 0.953125 0.9375 0.890625 0.9203629 0.87096774
|
|
0.93649194 0.91935484 0.96774194 0.90322581]
|
|
|
|
mean value: 0.9220766129032258
|
|
|
|
key: train_roc_auc
|
|
value: [0.96470164 0.94876325 0.92756184 0.90989399 0.96292241 0.9471831
|
|
0.95595481 0.95950704 0.97007042 0.93309859]
|
|
|
|
mean value: 0.9479657094510526
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.91176471 0.88571429 0.81578947 0.84848485 0.8
|
|
0.88235294 0.86486486 0.94117647 0.84210526]
|
|
|
|
mean value: 0.8649395710696021
|
|
|
|
key: train_jcc
|
|
value: [0.93288591 0.90734824 0.87384615 0.84776119 0.93023256 0.90415335
|
|
0.91749175 0.9248366 0.94333333 0.88161994]
|
|
|
|
mean value: 0.9063509031010042
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02395439 0.01864457 0.02186561 0.02039933 0.02593637 0.02293134
|
|
0.02203345 0.02455306 0.02218056 0.02344394]
|
|
|
|
mean value: 0.022594261169433593
|
|
|
|
key: score_time
|
|
value: [0.01191592 0.01185274 0.01183462 0.01186132 0.01182365 0.01181459
|
|
0.01181173 0.01192665 0.01187754 0.01197457]
|
|
|
|
mean value: 0.011869335174560547
|
|
|
|
key: test_mcc
|
|
value: [0.79833297 0.90524194 0.85238636 0.90900317 0.87462485 0.74424094
|
|
0.93832585 0.80947581 0.82408564 0.77042092]
|
|
|
|
mean value: 0.8426138438007634
|
|
|
|
key: train_mcc
|
|
value: [0.83975339 0.88364483 0.86443423 0.8874727 0.88748387 0.88966677
|
|
0.92519819 0.90603937 0.86139045 0.90567804]
|
|
|
|
mean value: 0.885076182699108
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.95238095 0.92063492 0.95238095 0.93650794 0.85714286
|
|
0.96825397 0.9047619 0.9047619 0.87301587]
|
|
|
|
mean value: 0.9158730158730158
|
|
|
|
key: train_accuracy
|
|
value: [0.91358025 0.94179894 0.92768959 0.94356261 0.94356261 0.94179894
|
|
0.96119929 0.95238095 0.92592593 0.95061728]
|
|
|
|
mean value: 0.9402116402116402
|
|
|
|
key: test_fscore
|
|
value: [0.89855072 0.95238095 0.92537313 0.95384615 0.93333333 0.87671233
|
|
0.96969697 0.90625 0.91428571 0.88888889]
|
|
|
|
mean value: 0.9219318200165175
|
|
|
|
key: train_fscore
|
|
value: [0.92058347 0.94220665 0.93267652 0.94444444 0.94285714 0.94490818
|
|
0.96258503 0.9535284 0.93092105 0.95286195]
|
|
|
|
mean value: 0.9427572848690878
|
|
|
|
key: test_precision
|
|
value: [0.81578947 0.9375 0.86111111 0.91176471 0.96551724 0.7804878
|
|
0.94117647 0.90625 0.84210526 0.8 ]
|
|
|
|
mean value: 0.8761702070681163
|
|
|
|
key: train_precision
|
|
value: [0.85285285 0.93728223 0.87384615 0.93150685 0.95652174 0.89556962
|
|
0.92786885 0.9295302 0.87076923 0.90996785]
|
|
|
|
mean value: 0.9085715575592525
|
|
|
|
key: test_recall
|
|
value: [1. 0.96774194 1. 1. 0.90322581 1.
|
|
1. 0.90625 1. 1. ]
|
|
|
|
mean value: 0.9777217741935484
|
|
|
|
key: train_recall
|
|
value: [1. 0.9471831 1. 0.95774648 0.92957746 1.
|
|
1. 0.97879859 1. 1. ]
|
|
|
|
mean value: 0.9813305628825959
|
|
|
|
key: test_roc_auc
|
|
value: [0.890625 0.95262097 0.921875 0.953125 0.9359879 0.85483871
|
|
0.96774194 0.9047379 0.90322581 0.87096774]
|
|
|
|
mean value: 0.9155745967741935
|
|
|
|
key: train_roc_auc
|
|
value: [0.91342756 0.94178943 0.92756184 0.94353755 0.94358732 0.94190141
|
|
0.96126761 0.95242746 0.92605634 0.95070423]
|
|
|
|
mean value: 0.9402260737570298
|
|
|
|
key: test_jcc
|
|
value: [0.81578947 0.90909091 0.86111111 0.91176471 0.875 0.7804878
|
|
0.94117647 0.82857143 0.84210526 0.8 ]
|
|
|
|
mean value: 0.8565097166964191
|
|
|
|
key: train_jcc
|
|
value: [0.85285285 0.89072848 0.87384615 0.89473684 0.89189189 0.89556962
|
|
0.92786885 0.91118421 0.87076923 0.90996785]
|
|
|
|
mean value: 0.8919415977184245
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.44
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19980049 0.19275093 0.19083118 0.19255972 0.18906665 0.19009423
|
|
0.19072938 0.19309402 0.19299769 0.19240642]
|
|
|
|
mean value: 0.19243307113647462
|
|
|
|
key: score_time
|
|
value: [0.01648927 0.01677728 0.0169611 0.0167799 0.01661658 0.01674891
|
|
0.0165782 0.01664019 0.01642776 0.01645136]
|
|
|
|
mean value: 0.01664705276489258
|
|
|
|
key: test_mcc
|
|
value: [0.88034084 0.93844649 0.90900317 0.93844649 0.96875 0.79701677
|
|
0.93832585 0.93832585 0.87988269 0.87988269]
|
|
|
|
mean value: 0.9068420855945698
|
|
|
|
key: train_mcc
|
|
value: [0.99647883 0.99647883 0.98947316 0.99647883 0.99296993 0.98947355
|
|
0.9929701 1. 0.99647887 0.9929701 ]
|
|
|
|
mean value: 0.9943772208103349
|
|
|
|
key: test_accuracy
|
|
value: [0.93650794 0.96825397 0.95238095 0.96825397 0.98412698 0.88888889
|
|
0.96825397 0.96825397 0.93650794 0.93650794]
|
|
|
|
mean value: 0.9507936507936507
|
|
|
|
key: train_accuracy
|
|
value: [0.99823633 0.99823633 0.99470899 0.99823633 0.99647266 0.99470899
|
|
0.99647266 1. 0.99823633 0.99647266]
|
|
|
|
mean value: 0.9971781305114638
|
|
|
|
key: test_fscore
|
|
value: [0.93939394 0.96875 0.95384615 0.96875 0.98412698 0.90140845
|
|
0.96969697 0.96969697 0.94117647 0.94117647]
|
|
|
|
mean value: 0.9538022408641713
|
|
|
|
key: train_fscore
|
|
value: [0.99824253 0.99824253 0.99474606 0.99824253 0.99649123 0.99472759
|
|
0.99647887 1. 0.99823633 0.99647887]
|
|
|
|
mean value: 0.9971886550197643
|
|
|
|
key: test_precision
|
|
value: [0.88571429 0.93939394 0.91176471 0.93939394 0.96875 0.82051282
|
|
0.94117647 0.94117647 0.88888889 0.88888889]
|
|
|
|
mean value: 0.9125660409851586
|
|
|
|
key: train_precision
|
|
value: [0.99649123 0.99649123 0.98954704 0.99649123 0.99300699 0.98951049
|
|
0.99298246 1. 0.99647887 0.99298246]
|
|
|
|
mean value: 0.9943981990575673
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.96875 0.953125 0.96875 0.984375 0.88709677
|
|
0.96774194 0.96774194 0.93548387 0.93548387]
|
|
|
|
mean value: 0.9506048387096775
|
|
|
|
key: train_roc_auc
|
|
value: [0.99823322 0.99823322 0.99469965 0.99823322 0.99646643 0.99471831
|
|
0.99647887 1. 0.99823944 0.99647887]
|
|
|
|
mean value: 0.9971781217339372
|
|
|
|
key: test_jcc
|
|
value: [0.88571429 0.93939394 0.91176471 0.93939394 0.96875 0.82051282
|
|
0.94117647 0.94117647 0.88888889 0.88888889]
|
|
|
|
mean value: 0.9125660409851586
|
|
|
|
key: train_jcc
|
|
value: [0.99649123 0.99649123 0.98954704 0.99649123 0.99300699 0.98951049
|
|
0.99298246 1. 0.99647887 0.99298246]
|
|
|
|
mean value: 0.9943981990575673
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07831621 0.08840275 0.09410763 0.0900991 0.11436749 0.10805988
|
|
0.10566497 0.10121298 0.11344123 0.09253383]
|
|
|
|
mean value: 0.09862060546875
|
|
|
|
key: score_time
|
|
value: [0.02243733 0.03880763 0.02347946 0.03738046 0.03915358 0.04094911
|
|
0.02115917 0.03472233 0.03503799 0.02665401]
|
|
|
|
mean value: 0.03197810649871826
|
|
|
|
key: test_mcc
|
|
value: [0.93844649 0.96875 0.93844649 0.93844649 0.93844649 0.90873893
|
|
0.96871896 0.93832585 0.96871896 0.85168687]
|
|
|
|
mean value: 0.9358725549341791
|
|
|
|
key: train_mcc
|
|
value: [0.99647883 0.99647883 1. 0.99647883 1. 1.
|
|
1. 1. 0.99647887 1. ]
|
|
|
|
mean value: 0.9985915361756411
|
|
|
|
key: test_accuracy
|
|
value: [0.96825397 0.98412698 0.96825397 0.96825397 0.96825397 0.95238095
|
|
0.98412698 0.96825397 0.98412698 0.92063492]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.99823633 0.99823633 1. 0.99823633 1. 1.
|
|
1. 1. 0.99823633 1. ]
|
|
|
|
mean value: 0.9992945326278659
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.98412698 0.96875 0.96875 0.96875 0.95522388
|
|
0.98461538 0.96969697 0.98461538 0.92753623]
|
|
|
|
mean value: 0.9680814835535796
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[0.99824253 0.99824253 1. 0.99824253 1. 1.
|
|
1. 1. 0.99823633 1. ]
|
|
|
|
mean value: 0.9992963923836801
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.96875 0.93939394 0.93939394 0.93939394 0.91428571
|
|
0.96969697 0.94117647 0.96969697 0.86486486]
|
|
|
|
mean value: 0.9386046746708512
|
|
|
|
key: train_precision
|
|
value: [0.99649123 0.99649123 1. 0.99649123 1. 1.
|
|
1. 1. 0.99647887 1. ]
|
|
|
|
mean value: 0.9985952557449963
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.984375 0.96875 0.96875 0.96875 0.9516129
|
|
0.98387097 0.96774194 0.98387097 0.91935484]
|
|
|
|
mean value: 0.9665826612903226
|
|
|
|
key: train_roc_auc
|
|
value: [0.99823322 0.99823322 1. 0.99823322 1. 1.
|
|
1. 1. 0.99823944 1. ]
|
|
|
|
mean value: 0.9992939083262828
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.96875 0.93939394 0.93939394 0.93939394 0.91428571
|
|
0.96969697 0.94117647 0.96969697 0.86486486]
|
|
|
|
mean value: 0.9386046746708512
|
|
|
|
key: train_jcc
|
|
value: [0.99649123 0.99649123 1. 0.99649123 1. 1.
|
|
1. 1. 0.99647887 1. ]
|
|
|
|
mean value: 0.9985952557449963
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.23061109 0.23501968 0.23505569 0.24136305 0.29540896 0.25719118
|
|
0.24894857 0.26424384 0.25834155 0.17915821]
|
|
|
|
mean value: 0.24453418254852294
|
|
|
|
key: score_time
|
|
value: [0.02738714 0.02701712 0.02778244 0.02967429 0.0275135 0.02850556
|
|
0.02690411 0.02715778 0.03280067 0.03189731]
|
|
|
|
mean value: 0.028663992881774902
|
|
|
|
key: test_mcc
|
|
value: [0.82507166 0.93844649 0.85238636 0.90900317 0.93844649 0.85168687
|
|
0.90873893 0.85168687 0.93832585 0.93832585]
|
|
|
|
mean value: 0.895211856126571
|
|
|
|
key: train_mcc
|
|
value: [0.96874043 0.96192098 0.972167 0.972167 0.96532513 0.96874387
|
|
0.96192609 0.96532937 0.96532937 0.97216973]
|
|
|
|
mean value: 0.967381895794655
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.96825397 0.92063492 0.95238095 0.96825397 0.92063492
|
|
0.95238095 0.92063492 0.96825397 0.96825397]
|
|
|
|
mean value: 0.9444444444444444
|
|
|
|
key: train_accuracy
|
|
value: [0.98412698 0.98059965 0.98589065 0.98589065 0.98236332 0.98412698
|
|
0.98059965 0.98236332 0.98236332 0.98589065]
|
|
|
|
mean value: 0.9834215167548501
|
|
|
|
key: test_fscore
|
|
value: [0.91176471 0.96875 0.92537313 0.95384615 0.96875 0.92753623
|
|
0.95522388 0.92753623 0.96969697 0.96969697]
|
|
|
|
mean value: 0.9478174277815936
|
|
|
|
key: train_fscore
|
|
value: [0.98440208 0.98100173 0.98611111 0.98611111 0.98269896 0.98434783
|
|
0.98093588 0.98263889 0.98263889 0.98606272]
|
|
|
|
mean value: 0.9836949187849766
|
|
|
|
key: test_precision
|
|
value: [0.83783784 0.93939394 0.86111111 0.91176471 0.93939394 0.86486486
|
|
0.91428571 0.86486486 0.94117647 0.94117647]
|
|
|
|
mean value: 0.9015869918811096
|
|
|
|
key: train_precision
|
|
value: [0.96928328 0.96271186 0.97260274 0.97260274 0.96598639 0.96917808
|
|
0.96258503 0.96587031 0.96587031 0.97250859]
|
|
|
|
mean value: 0.967919933647232
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.96875 0.921875 0.953125 0.96875 0.91935484
|
|
0.9516129 0.91935484 0.96774194 0.96774194]
|
|
|
|
mean value: 0.9444556451612903
|
|
|
|
key: train_roc_auc
|
|
value: [0.98409894 0.98056537 0.98586572 0.98586572 0.98233216 0.98415493
|
|
0.9806338 0.98239437 0.98239437 0.98591549]
|
|
|
|
mean value: 0.9834220872940825
|
|
|
|
key: test_jcc
|
|
value: [0.83783784 0.93939394 0.86111111 0.91176471 0.93939394 0.86486486
|
|
0.91428571 0.86486486 0.94117647 0.94117647]
|
|
|
|
mean value: 0.9015869918811096
|
|
|
|
key: train_jcc
|
|
value: [0.96928328 0.96271186 0.97260274 0.97260274 0.96598639 0.96917808
|
|
0.96258503 0.96587031 0.96587031 0.97250859]
|
|
|
|
mean value: 0.967919933647232
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.71508455 0.70400739 0.70438623 0.70540261 0.70530033 0.70265675
|
|
0.70786166 0.70629096 0.70211506 0.70343208]
|
|
|
|
mean value: 0.7056537628173828
|
|
|
|
key: score_time
|
|
value: [0.00990725 0.00918722 0.00916815 0.00947952 0.00983 0.00956821
|
|
0.00931525 0.00925922 0.00939631 0.00930548]
|
|
|
|
mean value: 0.009441661834716796
|
|
|
|
key: test_mcc
|
|
value: [0.85238636 0.93844649 0.88034084 0.90900317 1. 0.87988269
|
|
0.96871896 0.90873893 0.93832585 0.87988269]
|
|
|
|
mean value: 0.9155725990232852
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.96825397 0.93650794 0.95238095 1. 0.93650794
|
|
0.98412698 0.95238095 0.96825397 0.93650794]
|
|
|
|
mean value: 0.9555555555555555
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.92537313 0.96875 0.93939394 0.95384615 1. 0.94117647
|
|
0.98461538 0.95522388 0.96969697 0.94117647]
|
|
|
|
mean value: 0.9579252403654291
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.86111111 0.93939394 0.88571429 0.91176471 1. 0.88888889
|
|
0.96969697 0.91428571 0.94117647 0.88888889]
|
|
|
|
mean value: 0.9200920974450386
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.96875 0.9375 0.953125 1. 0.93548387
|
|
0.98387097 0.9516129 0.96774194 0.93548387]
|
|
|
|
mean value: 0.9555443548387097
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.86111111 0.93939394 0.88571429 0.91176471 1. 0.88888889
|
|
0.96969697 0.91428571 0.94117647 0.88888889]
|
|
|
|
mean value: 0.9200920974450386
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.4
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0348866 0.03121352 0.03251052 0.03193569 0.03241062 0.03577042
|
|
0.03243017 0.03188968 0.05515742 0.04671407]
|
|
|
|
mean value: 0.03649187088012695
|
|
|
|
key: score_time
|
|
value: [0.01268077 0.0138905 0.01758075 0.01721144 0.01459551 0.01551962
|
|
0.01452184 0.02729797 0.03374696 0.01482415]
|
|
|
|
mean value: 0.01818695068359375
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.016325 0.01685786 0.03920698 0.05000663 0.02742696 0.01647878
|
|
0.01650167 0.03942823 0.03394032 0.03964949]
|
|
|
|
mean value: 0.02958219051361084
|
|
|
|
key: score_time
|
|
value: [0.02931905 0.01375628 0.0186646 0.02551079 0.02132893 0.01211834
|
|
0.01204419 0.01876664 0.0186851 0.01884413]
|
|
|
|
mean value: 0.01890380382537842
|
|
|
|
key: test_mcc
|
|
value: [0.77211 0.93844649 0.88034084 0.82507166 0.8415746 0.82408564
|
|
0.90873893 0.82408564 0.87988269 0.82408564]
|
|
|
|
mean value: 0.8518422141790842
|
|
|
|
key: train_mcc
|
|
value: [0.9218992 0.89600606 0.90564657 0.91536977 0.88377651 0.91539508
|
|
0.90890766 0.90245767 0.90890766 0.91539508]
|
|
|
|
mean value: 0.9073761245158879
|
|
|
|
key: test_accuracy
|
|
value: [0.87301587 0.96825397 0.93650794 0.9047619 0.92063492 0.9047619
|
|
0.95238095 0.9047619 0.93650794 0.9047619 ]
|
|
|
|
mean value: 0.9206349206349206
|
|
|
|
key: train_accuracy
|
|
value: [0.95943563 0.94532628 0.95061728 0.95590829 0.94003527 0.95590829
|
|
0.95238095 0.94885362 0.95238095 0.95590829]
|
|
|
|
mean value: 0.9516754850088183
|
|
|
|
key: test_fscore
|
|
value: [0.88571429 0.96875 0.93939394 0.91176471 0.91803279 0.91428571
|
|
0.95522388 0.91428571 0.94117647 0.91428571]
|
|
|
|
mean value: 0.9262913211918217
|
|
|
|
key: train_fscore
|
|
value: [0.96108291 0.94824708 0.95302013 0.95784148 0.94276094 0.95769882
|
|
0.9544688 0.9512605 0.9544688 0.95769882]
|
|
|
|
mean value: 0.9538548290486133
|
|
|
|
key: test_precision
|
|
value: [0.79487179 0.93939394 0.88571429 0.83783784 0.93333333 0.84210526
|
|
0.91428571 0.84210526 0.88888889 0.84210526]
|
|
|
|
mean value: 0.8720641583799478
|
|
|
|
key: train_precision
|
|
value: [0.92508143 0.9015873 0.91025641 0.91909385 0.90322581 0.91883117
|
|
0.91290323 0.90705128 0.91290323 0.91883117]
|
|
|
|
mean value: 0.9129764873979289
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.90322581 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9903225806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98591549 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985915492957746
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.96875 0.9375 0.90625 0.9203629 0.90322581
|
|
0.9516129 0.90322581 0.93548387 0.90322581]
|
|
|
|
mean value: 0.9204637096774193
|
|
|
|
key: train_roc_auc
|
|
value: [0.95936396 0.94522968 0.95053004 0.95583039 0.93995421 0.95598592
|
|
0.95246479 0.94894366 0.95246479 0.95598592]
|
|
|
|
mean value: 0.9516753346936744
|
|
|
|
key: test_jcc
|
|
value: [0.79487179 0.93939394 0.88571429 0.83783784 0.84848485 0.84210526
|
|
0.91428571 0.84210526 0.88888889 0.84210526]
|
|
|
|
mean value: 0.8635793098950993
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:155: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:158: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
|
|
key: train_jcc
|
|
value: [0.92508143 0.9015873 0.91025641 0.91909385 0.89171975 0.91883117
|
|
0.91290323 0.90705128 0.91290323 0.91883117]
|
|
|
|
mean value: 0.9118258812750606
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.28792405 0.29805946 0.29593897 0.38518167 0.30680323 0.32595825
|
|
0.30477858 0.30297041 0.29878044 0.29801202]
|
|
|
|
mean value: 0.31044070720672606
|
|
|
|
key: score_time
|
|
value: [0.01887178 0.01874852 0.01868033 0.01886344 0.01873064 0.01926589
|
|
0.01881599 0.01879597 0.02321172 0.01873493]
|
|
|
|
mean value: 0.01927192211151123
|
|
|
|
key: test_mcc
|
|
value: [0.77211 0.93844649 0.90900317 0.88034084 0.8415746 0.82408564
|
|
0.8415746 0.82408564 0.85168687 0.82408564]
|
|
|
|
mean value: 0.8506993498561127
|
|
|
|
key: train_mcc
|
|
value: [0.9218992 0.89600606 0.88011503 0.88644555 0.87082706 0.91539508
|
|
0.86700465 0.90245767 0.89285111 0.91539508]
|
|
|
|
mean value: 0.8948396485010702
|
|
|
|
key: test_accuracy
|
|
value: [0.87301587 0.96825397 0.95238095 0.93650794 0.92063492 0.9047619
|
|
0.92063492 0.9047619 0.92063492 0.9047619 ]
|
|
|
|
mean value: 0.9206349206349206
|
|
|
|
key: train_accuracy
|
|
value: [0.95943563 0.94532628 0.93650794 0.94003527 0.9329806 0.95590829
|
|
0.93121693 0.94885362 0.94356261 0.95590829]
|
|
|
|
mean value: 0.9449735449735449
|
|
|
|
key: test_fscore
|
|
value: [0.88571429 0.96875 0.95384615 0.93939394 0.91803279 0.91428571
|
|
0.92307692 0.91428571 0.92753623 0.91428571]
|
|
|
|
mean value: 0.9259207463657748
|
|
|
|
key: train_fscore
|
|
value: [0.96108291 0.94824708 0.94039735 0.94352159 0.93645485 0.95769882
|
|
0.93445378 0.9512605 0.94648829 0.95769882]
|
|
|
|
mean value: 0.9477303995124026
|
|
|
|
key: test_precision
|
|
value: [0.79487179 0.93939394 0.91176471 0.88571429 0.93333333 0.84210526
|
|
0.90909091 0.84210526 0.86486486 0.84210526]
|
|
|
|
mean value: 0.8765349622625165
|
|
|
|
key: train_precision
|
|
value: [0.92508143 0.9015873 0.8875 0.89308176 0.89171975 0.91883117
|
|
0.89102564 0.90705128 0.8984127 0.91883117]
|
|
|
|
mean value: 0.9033122200193235
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.90322581 1.
|
|
0.9375 1. 1. 1. ]
|
|
|
|
mean value: 0.9840725806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98591549 1.
|
|
0.98233216 1. 1. 1. ]
|
|
|
|
mean value: 0.9968247648434778
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.96875 0.953125 0.9375 0.9203629 0.90322581
|
|
0.9203629 0.90322581 0.91935484 0.90322581]
|
|
|
|
mean value: 0.9204133064516129
|
|
|
|
key: train_roc_auc
|
|
value: [0.95936396 0.94522968 0.93639576 0.93992933 0.93288708 0.95598592
|
|
0.93130692 0.94894366 0.94366197 0.95598592]
|
|
|
|
mean value: 0.9449690190613647
|
|
|
|
key: test_jcc
|
|
value: [0.79487179 0.93939394 0.91176471 0.88571429 0.84848485 0.84210526
|
|
0.85714286 0.84210526 0.86486486 0.84210526]
|
|
|
|
mean value: 0.8628553085828627
|
|
|
|
key: train_jcc
|
|
value: [0.92508143 0.9015873 0.8875 0.89308176 0.88050314 0.91883117
|
|
0.87697161 0.90705128 0.8984127 0.91883117]
|
|
|
|
mean value: 0.9007851567431561
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02203321 0.02207637 0.02978015 0.02458477 0.02242613 0.02654362
|
|
0.02191043 0.0230968 0.02257419 0.02493763]
|
|
|
|
mean value: 0.02399632930755615
|
|
|
|
key: score_time
|
|
value: [0.01149082 0.01155376 0.01170182 0.01157141 0.01151323 0.01149535
|
|
0.0114727 0.01154852 0.01159859 0.01157212]
|
|
|
|
mean value: 0.011551833152770996
|
|
|
|
key: test_mcc
|
|
value: [0.41666667 0.41666667 0.75 0.54772256 0.47140452 0.47140452
|
|
0.73029674 0.16666667 0.73029674 0.35355339]
|
|
|
|
mean value: 0.5054678476360946
|
|
|
|
key: train_mcc
|
|
value: [0.85168687 0.87462485 0.84484323 0.93649194 0.87462485 0.90524194
|
|
0.84530217 0.87487431 0.81644514 0.96875 ]
|
|
|
|
mean value: 0.8792885296628369
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.85714286 0.71428571 0.71428571 0.71428571
|
|
0.85714286 0.57142857 0.85714286 0.57142857]
|
|
|
|
mean value: 0.7285714285714285
|
|
|
|
key: train_accuracy
|
|
value: [0.92063492 0.93650794 0.92063492 0.96825397 0.93650794 0.95238095
|
|
0.92063492 0.93650794 0.9047619 0.98412698]
|
|
|
|
mean value: 0.9380952380952381
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.85714286 0.75 0.5 0.8
|
|
0.88888889 0.57142857 0.88888889 0.4 ]
|
|
|
|
mean value: 0.698968253968254
|
|
|
|
key: train_fscore
|
|
value: [0.92753623 0.93939394 0.92537313 0.96875 0.93939394 0.95238095
|
|
0.92307692 0.9375 0.90909091 0.98412698]
|
|
|
|
mean value: 0.9406623013676063
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.75 0.6 1. 0.66666667
|
|
0.8 0.66666667 0.8 1. ]
|
|
|
|
mean value: 0.7616666666666667
|
|
|
|
key: train_precision
|
|
value: [0.86486486 0.91176471 0.88571429 0.96875 0.91176471 0.9375
|
|
0.88235294 0.90909091 0.85714286 0.96875 ]
|
|
|
|
mean value: 0.9097695269754094
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 1. 0.33333333 1.
|
|
1. 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 0.96875 0.96875 0.96875 0.96875 0.96774194
|
|
0.96774194 0.96774194 0.96774194 1. ]
|
|
|
|
mean value: 0.9745967741935484
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.875 0.75 0.66666667 0.66666667
|
|
0.83333333 0.58333333 0.83333333 0.625 ]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_roc_auc
|
|
value: [0.91935484 0.9359879 0.91985887 0.96824597 0.9359879 0.95262097
|
|
0.92137097 0.93699597 0.90574597 0.984375 ]
|
|
|
|
mean value: 0.938054435483871
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.75 0.6 0.33333333 0.66666667
|
|
0.8 0.4 0.8 0.25 ]
|
|
|
|
mean value: 0.56
|
|
|
|
key: train_jcc
|
|
value: [0.86486486 0.88571429 0.86111111 0.93939394 0.88571429 0.90909091
|
|
0.85714286 0.88235294 0.83333333 0.96875 ]
|
|
|
|
mean value: 0.8887468527542057
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.53658152 0.55067277 0.64655375 0.54629493 0.58255625 0.53773832
|
|
0.65930247 0.52584004 0.58925724 0.63752484]
|
|
|
|
mean value: 0.581232213973999
|
|
|
|
key: score_time
|
|
value: [0.01170802 0.01178622 0.01169777 0.01187515 0.01168346 0.01403332
|
|
0.01179934 0.01174402 0.01176906 0.01210141]
|
|
|
|
mean value: 0.012019777297973632
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.41666667 0.75 0.54772256 0.47140452 -0.35355339
|
|
0.73029674 -0.16666667 0.73029674 0.35355339]
|
|
|
|
mean value: 0.38963872316433074
|
|
|
|
key: train_mcc
|
|
value: [0.78094752 0.75156646 0.72270545 0.68352185 0.72270545 1.
|
|
0.75254943 0.81644514 0.78822824 0.84530217]
|
|
|
|
mean value: 0.7863971714606052
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.85714286 0.71428571 0.71428571 0.42857143
|
|
0.85714286 0.42857143 0.85714286 0.57142857]
|
|
|
|
mean value: 0.6857142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.88888889 0.87301587 0.85714286 0.84126984 0.85714286 1.
|
|
0.87301587 0.9047619 0.88888889 0.92063492]
|
|
|
|
mean value: 0.8904761904761904
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.85714286 0.75 0.5 0.6
|
|
0.88888889 0.5 0.88888889 0.4 ]
|
|
|
|
mean value: 0.6718253968253968
|
|
|
|
key: train_fscore
|
|
value: [0.89552239 0.88235294 0.86956522 0.84848485 0.86956522 1.
|
|
0.87878788 0.90909091 0.89552239 0.92307692]
|
|
|
|
mean value: 0.8971968711519042
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.75 0.6 1. 0.5
|
|
0.8 0.5 0.8 1. ]
|
|
|
|
mean value: 0.7283333333333333
|
|
|
|
key: train_precision
|
|
value: [0.85714286 0.83333333 0.81081081 0.82352941 0.81081081 1.
|
|
0.82857143 0.85714286 0.83333333 0.88235294]
|
|
|
|
mean value: 0.8537027784086608
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 1. 0.33333333 0.75
|
|
1. 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_recall
|
|
value: [0.9375 0.9375 0.9375 0.875 0.9375 1.
|
|
0.93548387 0.96774194 0.96774194 0.96774194]
|
|
|
|
mean value: 0.9463709677419355
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.875 0.75 0.66666667 0.375
|
|
0.83333333 0.41666667 0.83333333 0.625 ]
|
|
|
|
mean value: 0.6791666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.88810484 0.87197581 0.85584677 0.84072581 0.85584677 1.
|
|
0.87399194 0.90574597 0.89012097 0.92137097]
|
|
|
|
mean value: 0.8903729838709677
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.75 0.6 0.33333333 0.42857143
|
|
0.8 0.33333333 0.8 0.25 ]
|
|
|
|
mean value: 0.5295238095238095
|
|
|
|
key: train_jcc
|
|
value: [0.81081081 0.78947368 0.76923077 0.73684211 0.76923077 1.
|
|
0.78378378 0.83333333 0.81081081 0.85714286]
|
|
|
|
mean value: 0.8160658923816819
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01239443 0.0114634 0.01012373 0.00828505 0.00877762 0.00855231
|
|
0.00827384 0.00841808 0.0091958 0.00830746]
|
|
|
|
mean value: 0.009379172325134277
|
|
|
|
key: score_time
|
|
value: [0.01172256 0.01489496 0.01005602 0.00921273 0.00919962 0.00918698
|
|
0.00856519 0.008425 0.00845075 0.00839353]
|
|
|
|
mean value: 0.009810733795166015
|
|
|
|
key: test_mcc
|
|
value: [ 0.75 0.75 0.75 -0.16666667 0.75 0.09128709
|
|
0.73029674 0.73029674 0.73029674 0.54772256]
|
|
|
|
mean value: 0.5663233213776692
|
|
|
|
key: train_mcc
|
|
value: [0.77042092 0.7591889 0.74424094 0.82507166 0.77042092 0.77211
|
|
0.74634526 0.77211 0.74634526 0.82507166]
|
|
|
|
mean value: 0.7731325501413768
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.85714286 0.85714286 0.42857143 0.85714286 0.57142857
|
|
0.85714286 0.85714286 0.85714286 0.71428571]
|
|
|
|
mean value: 0.7714285714285714
|
|
|
|
key: train_accuracy
|
|
value: [0.87301587 0.87301587 0.85714286 0.9047619 0.87301587 0.87301587
|
|
0.85714286 0.87301587 0.85714286 0.9047619 ]
|
|
|
|
mean value: 0.8746031746031746
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.85714286 0.85714286 0.33333333 0.85714286 0.66666667
|
|
0.88888889 0.88888889 0.88888889 0.66666667]
|
|
|
|
mean value: 0.7761904761904762
|
|
|
|
key: train_fscore
|
|
value: [0.88888889 0.88571429 0.87671233 0.89655172 0.88888889 0.88571429
|
|
0.87323944 0.88571429 0.87323944 0.91176471]
|
|
|
|
mean value: 0.8866428266947479
|
|
|
|
key: test_precision
|
|
value: [0.75 0.75 0.75 0.33333333 0.75 0.6
|
|
0.8 0.8 0.8 1. ]
|
|
|
|
mean value: 0.7333333333333334
|
|
|
|
key: train_precision
|
|
value: [0.8 0.81578947 0.7804878 1. 0.8 0.79487179
|
|
0.775 0.79487179 0.775 0.83783784]
|
|
|
|
mean value: 0.8173858706143687
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 0.33333333 1. 0.75
|
|
1. 1. 1. 0.5 ]
|
|
|
|
mean value: 0.8583333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 0.96875 1. 0.8125 1. 1. 1. 1. 1.
|
|
1. ]
|
|
|
|
mean value: 0.978125
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.875 0.41666667 0.875 0.54166667
|
|
0.83333333 0.83333333 0.83333333 0.75 ]
|
|
|
|
mean value: 0.7708333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.87096774 0.87147177 0.85483871 0.90625 0.87096774 0.875
|
|
0.859375 0.875 0.859375 0.90625 ]
|
|
|
|
mean value: 0.8749495967741936
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.75 0.75 0.2 0.75 0.5 0.8 0.8 0.8 0.5 ]
|
|
|
|
mean value: 0.66
|
|
|
|
key: train_jcc
|
|
value: [0.8 0.79487179 0.7804878 0.8125 0.8 0.79487179
|
|
0.775 0.79487179 0.775 0.83783784]
|
|
|
|
mean value: 0.7965441027331271
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00904942 0.00836277 0.00843406 0.00832176 0.00934124 0.00865722
|
|
0.00957727 0.00829864 0.00854516 0.00933099]
|
|
|
|
mean value: 0.008791851997375488
|
|
|
|
key: score_time
|
|
value: [0.00915694 0.00888419 0.00845146 0.00843883 0.00910306 0.0091691
|
|
0.00931549 0.00887728 0.00840974 0.00922036]
|
|
|
|
mean value: 0.008902645111083985
|
|
|
|
key: test_mcc
|
|
value: [ 0.16666667 0.41666667 -0.16666667 -0.16666667 0.41666667 0.41666667
|
|
0.41666667 0.16666667 0.75 0.35355339]
|
|
|
|
mean value: 0.27702200572599406
|
|
|
|
key: train_mcc
|
|
value: [0.61982085 0.65120968 0.61895161 0.71471774 0.59049817 0.71471774
|
|
0.65085805 0.5892604 0.55909213 0.68865372]
|
|
|
|
mean value: 0.6397780094314123
|
|
|
|
key: test_accuracy
|
|
value: [0.57142857 0.71428571 0.42857143 0.42857143 0.71428571 0.71428571
|
|
0.71428571 0.57142857 0.85714286 0.57142857]
|
|
|
|
mean value: 0.6285714285714286
|
|
|
|
key: train_accuracy
|
|
value: [0.80952381 0.82539683 0.80952381 0.85714286 0.79365079 0.85714286
|
|
0.82539683 0.79365079 0.77777778 0.84126984]
|
|
|
|
mean value: 0.819047619047619
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.66666667 0.33333333 0.33333333 0.66666667 0.75
|
|
0.75 0.57142857 0.85714286 0.4 ]
|
|
|
|
mean value: 0.59
|
|
|
|
key: train_fscore
|
|
value: [0.81818182 0.82539683 0.8125 0.85714286 0.78688525 0.85714286
|
|
0.81967213 0.77966102 0.75862069 0.84848485]
|
|
|
|
mean value: 0.8163688290002712
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.33333333 0.33333333 0.66666667 0.75
|
|
0.75 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.6666666666666666
|
|
|
|
key: train_precision
|
|
value: [0.79411765 0.83870968 0.8125 0.87096774 0.82758621 0.84375
|
|
0.83333333 0.82142857 0.81481481 0.8 ]
|
|
|
|
mean value: 0.8257207992886934
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.33333333 0.33333333 0.66666667 0.75
|
|
0.75 0.5 0.75 0.25 ]
|
|
|
|
mean value: 0.5666666666666667
|
|
|
|
key: train_recall
|
|
value: [0.84375 0.8125 0.8125 0.84375 0.75 0.87096774
|
|
0.80645161 0.74193548 0.70967742 0.90322581]
|
|
|
|
mean value: 0.8094758064516129
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.70833333 0.41666667 0.41666667 0.70833333 0.70833333
|
|
0.70833333 0.58333333 0.875 0.625 ]
|
|
|
|
mean value: 0.6333333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.80897177 0.82560484 0.80947581 0.85735887 0.79435484 0.85735887
|
|
0.82510081 0.79284274 0.77671371 0.8422379 ]
|
|
|
|
mean value: 0.8190020161290322
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.5 0.2 0.2 0.5 0.6 0.6 0.4 0.75 0.25]
|
|
|
|
mean value: 0.44
|
|
|
|
key: train_jcc
|
|
value: [0.69230769 0.7027027 0.68421053 0.75 0.64864865 0.75
|
|
0.69444444 0.63888889 0.61111111 0.73684211]
|
|
|
|
mean value: 0.6909156119682436
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00852013 0.0091095 0.00907636 0.00912786 0.00904131 0.009058
|
|
0.00915551 0.00931859 0.00912452 0.00911093]
|
|
|
|
mean value: 0.009064269065856934
|
|
|
|
key: score_time
|
|
value: [0.00915408 0.01025391 0.01018119 0.00987482 0.0098784 0.01004815
|
|
0.00996161 0.01014972 0.00984311 0.00998163]
|
|
|
|
mean value: 0.009932661056518554
|
|
|
|
key: test_mcc
|
|
value: [ 0.54772256 0.41666667 0.75 0.41666667 -0.47140452 0.09128709
|
|
-0.09128709 -0.41666667 0.47140452 0.54772256]
|
|
|
|
mean value: 0.2262111781676999
|
|
|
|
key: train_mcc
|
|
value: [0.62325024 0.61982085 0.65315611 0.52419355 0.44068914 0.68865372
|
|
0.49493401 0.49193548 0.37363667 0.58778119]
|
|
|
|
mean value: 0.5498050968683705
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.85714286 0.71428571 0.28571429 0.57142857
|
|
0.42857143 0.28571429 0.71428571 0.71428571]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_accuracy
|
|
value: [0.80952381 0.80952381 0.82539683 0.76190476 0.71428571 0.84126984
|
|
0.74603175 0.74603175 0.68253968 0.77777778]
|
|
|
|
mean value: 0.7714285714285715
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.66666667 0.85714286 0.66666667 0.44444444 0.66666667
|
|
0.33333333 0.28571429 0.8 0.66666667]
|
|
|
|
mean value: 0.6137301587301587
|
|
|
|
key: train_fscore
|
|
value: [0.82352941 0.81818182 0.8358209 0.76190476 0.75 0.84848485
|
|
0.72413793 0.74193548 0.70588235 0.80555556]
|
|
|
|
mean value: 0.7815433059260706
|
|
|
|
key: test_precision
|
|
value: [0.6 0.66666667 0.75 0.66666667 0.33333333 0.6
|
|
0.5 0.33333333 0.66666667 1. ]
|
|
|
|
mean value: 0.6116666666666667
|
|
|
|
key: train_precision
|
|
value: [0.77777778 0.79411765 0.8 0.77419355 0.675 0.8
|
|
0.77777778 0.74193548 0.64864865 0.70731707]
|
|
|
|
mean value: 0.7496767956691824
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 1. 0.66666667 0.66666667 0.75
|
|
0.25 0.25 1. 0.5 ]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_recall
|
|
value: [0.875 0.84375 0.875 0.75 0.84375 0.90322581
|
|
0.67741935 0.74193548 0.77419355 0.93548387]
|
|
|
|
mean value: 0.821975806451613
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.70833333 0.875 0.70833333 0.33333333 0.54166667
|
|
0.45833333 0.29166667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6083333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [0.80846774 0.80897177 0.82459677 0.76209677 0.71219758 0.8422379
|
|
0.74495968 0.74596774 0.68397177 0.78024194]
|
|
|
|
mean value: 0.7713709677419355
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.5 0.75 0.5 0.28571429 0.5
|
|
0.2 0.16666667 0.66666667 0.5 ]
|
|
|
|
mean value: 0.4669047619047619
|
|
|
|
key: train_jcc
|
|
value: [0.7 0.69230769 0.71794872 0.61538462 0.6 0.73684211
|
|
0.56756757 0.58974359 0.54545455 0.6744186 ]
|
|
|
|
mean value: 0.6439667438321048
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01005101 0.00997472 0.0098536 0.00920486 0.00935388 0.00955009
|
|
0.00955963 0.00965333 0.00864577 0.00946832]
|
|
|
|
mean value: 0.009531521797180175
|
|
|
|
key: score_time
|
|
value: [0.00942826 0.00945902 0.00927925 0.00936508 0.00913 0.00872564
|
|
0.00948262 0.00879908 0.00889134 0.00923276]
|
|
|
|
mean value: 0.009179306030273438
|
|
|
|
key: test_mcc
|
|
value: [ 0.75 0.41666667 0.75 0.54772256 0.47140452 0.47140452
|
|
0.41666667 -0.54772256 1. 0.35355339]
|
|
|
|
mean value: 0.46296957655086707
|
|
|
|
key: train_mcc
|
|
value: [0.84484323 0.81092385 0.81092385 0.77822581 0.81572458 0.84173387
|
|
0.80947581 0.84530217 0.80947581 0.84530217]
|
|
|
|
mean value: 0.8211931140418118
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.71428571 0.85714286 0.71428571 0.71428571 0.71428571
|
|
0.71428571 0.28571429 1. 0.57142857]
|
|
|
|
mean value: 0.7142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [0.92063492 0.9047619 0.9047619 0.88888889 0.9047619 0.92063492
|
|
0.9047619 0.92063492 0.9047619 0.92063492]
|
|
|
|
mean value: 0.9095238095238095
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.66666667 0.85714286 0.75 0.5 0.8
|
|
0.75 0.44444444 1. 0.4 ]
|
|
|
|
mean value: 0.7025396825396826
|
|
|
|
key: train_fscore
|
|
value: [0.92537313 0.90909091 0.90909091 0.88888889 0.91176471 0.92063492
|
|
0.90322581 0.92307692 0.90322581 0.92307692]
|
|
|
|
mean value: 0.911744892697341
|
|
|
|
key: test_precision
|
|
value: [0.75 0.66666667 0.75 0.6 1. 0.66666667
|
|
0.75 0.4 1. 1. ]
|
|
|
|
mean value: 0.7583333333333333
|
|
|
|
key: train_precision
|
|
value: [0.88571429 0.88235294 0.88235294 0.90322581 0.86111111 0.90625
|
|
0.90322581 0.88235294 0.90322581 0.88235294]
|
|
|
|
mean value: 0.8892164580886117
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 1. 1. 0.33333333 1.
|
|
0.75 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_recall
|
|
value: [0.96875 0.9375 0.9375 0.875 0.96875 0.93548387
|
|
0.90322581 0.96774194 0.90322581 0.96774194]
|
|
|
|
mean value: 0.936491935483871
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.70833333 0.875 0.75 0.66666667 0.66666667
|
|
0.70833333 0.25 1. 0.625 ]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_roc_auc
|
|
value: [0.91985887 0.90423387 0.90423387 0.8891129 0.90372984 0.92086694
|
|
0.9047379 0.92137097 0.9047379 0.92137097]
|
|
|
|
mean value: 0.9094254032258065
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.5 0.75 0.6 0.33333333 0.66666667
|
|
0.6 0.28571429 1. 0.25 ]
|
|
|
|
mean value: 0.5735714285714285
|
|
|
|
key: train_jcc
|
|
value: [0.86111111 0.83333333 0.83333333 0.8 0.83783784 0.85294118
|
|
0.82352941 0.85714286 0.82352941 0.85714286]
|
|
|
|
mean value: 0.837990132990133
|
|
|
|
MCC on Blind test: 0.43
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.60025525 0.33146954 0.31887197 0.3127799 0.33535314 0.33147788
|
|
0.51483893 0.32325983 0.3623457 0.30383468]
|
|
|
|
mean value: 0.3734486818313599
|
|
|
|
key: score_time
|
|
value: [0.01195884 0.01195192 0.01197982 0.01200247 0.01194859 0.01192069
|
|
0.01204634 0.01197028 0.01195526 0.01195955]
|
|
|
|
mean value: 0.011969375610351562
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.41666667 0.75 0.54772256 0.47140452 0.09128709
|
|
0.16666667 -0.16666667 1. 0.35355339]
|
|
|
|
mean value: 0.4047300895140333
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.85714286 0.71428571 0.71428571 0.57142857
|
|
0.57142857 0.42857143 1. 0.57142857]
|
|
|
|
mean value: 0.6857142857142857
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.85714286 0.75 0.5 0.66666667
|
|
0.57142857 0.5 1. 0.4 ]
|
|
|
|
mean value: 0.6578571428571428
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.75 0.6 1. 0.6
|
|
0.66666667 0.5 1. 1. ]
|
|
|
|
mean value: 0.745
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 1. 0.33333333 0.75
|
|
0.5 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.6666666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.875 0.75 0.66666667 0.54166667
|
|
0.58333333 0.41666667 1. 0.625 ]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.75 0.6 0.33333333 0.5
|
|
0.4 0.33333333 1. 0.25 ]
|
|
|
|
mean value: 0.5166666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0139122 0.01349163 0.01157761 0.01099682 0.0113306 0.01069665
|
|
0.0109818 0.01038527 0.01077366 0.0101757 ]
|
|
|
|
mean value: 0.011432194709777832
|
|
|
|
key: score_time
|
|
value: [0.01145387 0.00903749 0.00967026 0.00893188 0.00859809 0.008883
|
|
0.00919175 0.00903225 0.00866652 0.00841498]
|
|
|
|
mean value: 0.009188008308410645
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.75 0.75 0.54772256 1. -0.35355339
|
|
0.73029674 0.09128709 0.54772256 0.41666667]
|
|
|
|
mean value: 0.4896808894008141
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.85714286 0.71428571 1. 0.42857143
|
|
0.85714286 0.57142857 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7428571428571429
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.85714286 0.75 1. 0.6
|
|
0.88888889 0.66666667 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7703174603174603
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.75 0.6 1. 0.5
|
|
0.8 0.6 1. 0.75 ]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 1. 1. 0.75
|
|
1. 0.75 0.5 0.75 ]
|
|
|
|
mean value: 0.8416666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.875 0.875 0.75 1. 0.375
|
|
0.83333333 0.54166667 0.75 0.70833333]
|
|
|
|
mean value: 0.7416666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.75 0.6 1. 0.42857143
|
|
0.8 0.5 0.5 0.6 ]
|
|
|
|
mean value: 0.6428571428571429
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.4
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08173561 0.07998013 0.08446407 0.08183193 0.08275533 0.08011079
|
|
0.0801332 0.07886791 0.07900286 0.07894683]
|
|
|
|
mean value: 0.08078286647796631
|
|
|
|
key: score_time
|
|
value: [0.01817846 0.016891 0.01702094 0.01818109 0.01853681 0.01737142
|
|
0.0170095 0.01700211 0.01693463 0.01700997]
|
|
|
|
mean value: 0.01741359233856201
|
|
|
|
key: test_mcc
|
|
value: [ 0.09128709 0.73029674 0.75 0.54772256 0.47140452 0.47140452
|
|
0.41666667 -0.16666667 0.47140452 0.35355339]
|
|
|
|
mean value: 0.4137073346729284
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.57142857 0.85714286 0.85714286 0.71428571 0.71428571 0.71428571
|
|
0.71428571 0.42857143 0.71428571 0.57142857]
|
|
|
|
mean value: 0.6857142857142857
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.8 0.85714286 0.75 0.5 0.8
|
|
0.75 0.5 0.8 0.4 ]
|
|
|
|
mean value: 0.6557142857142857
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.75 0.6 1. 0.66666667
|
|
0.75 0.5 0.66666667 1. ]
|
|
|
|
mean value: 0.7433333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 1. 0.33333333 1.
|
|
0.75 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.6833333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.54166667 0.83333333 0.875 0.75 0.66666667 0.66666667
|
|
0.70833333 0.41666667 0.66666667 0.625 ]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.66666667 0.75 0.6 0.33333333 0.66666667
|
|
0.6 0.33333333 0.66666667 0.25 ]
|
|
|
|
mean value: 0.5116666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.008461 0.00844383 0.00848389 0.00854969 0.00848627 0.00898242
|
|
0.00867033 0.00935841 0.00946116 0.00849795]
|
|
|
|
mean value: 0.008739495277404785
|
|
|
|
key: score_time
|
|
value: [0.00848055 0.00844765 0.00844049 0.00844431 0.00842762 0.00848174
|
|
0.00909472 0.00878549 0.00903988 0.00923586]
|
|
|
|
mean value: 0.008687829971313477
|
|
|
|
key: test_mcc
|
|
value: [-0.41666667 0.16666667 -0.41666667 -0.09128709 0.47140452 0.09128709
|
|
0.41666667 -0.41666667 0.41666667 0.54772256]
|
|
|
|
mean value: 0.07691270782961977
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.28571429 0.57142857 0.28571429 0.42857143 0.71428571 0.57142857
|
|
0.71428571 0.28571429 0.71428571 0.71428571]
|
|
|
|
mean value: 0.5285714285714286
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.28571429 0.57142857 0.28571429 0.5 0.5 0.66666667
|
|
0.75 0.28571429 0.75 0.66666667]
|
|
|
|
mean value: 0.5261904761904762
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.25 0.5 0.25 0.4 1. 0.6
|
|
0.75 0.33333333 0.75 1. ]
|
|
|
|
mean value: 0.5833333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 0.33333333 0.66666667 0.33333333 0.75
|
|
0.75 0.25 0.75 0.5 ]
|
|
|
|
mean value: 0.5333333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.29166667 0.58333333 0.29166667 0.45833333 0.66666667 0.54166667
|
|
0.70833333 0.29166667 0.70833333 0.75 ]
|
|
|
|
mean value: 0.5291666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.16666667 0.4 0.16666667 0.33333333 0.33333333 0.5
|
|
0.6 0.16666667 0.6 0.5 ]
|
|
|
|
mean value: 0.37666666666666665
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.03189254 1.02414799 0.99355721 0.99319196 1.02459288 1.07812452
|
|
1.06691289 1.05521536 1.00622463 0.98338842]
|
|
|
|
mean value: 1.0257248401641845
|
|
|
|
key: score_time
|
|
value: [0.09274769 0.0938468 0.08658433 0.09374738 0.0929749 0.09373045
|
|
0.09458351 0.09412074 0.08634567 0.09109592]
|
|
|
|
mean value: 0.09197773933410644
|
|
|
|
key: test_mcc
|
|
value: [ 0.09128709 0.73029674 0.75 0.54772256 0.09128709 0.09128709
|
|
0.16666667 -0.54772256 0.41666667 0.35355339]
|
|
|
|
mean value: 0.26910447460194115
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.57142857 0.85714286 0.85714286 0.71428571 0.57142857 0.57142857
|
|
0.57142857 0.28571429 0.71428571 0.57142857]
|
|
|
|
mean value: 0.6285714285714286
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.8 0.85714286 0.75 0.4 0.66666667
|
|
0.57142857 0.44444444 0.75 0.4 ]
|
|
|
|
mean value: 0.6039682539682539
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 1. 0.75 0.6 0.5 0.6
|
|
0.66666667 0.4 0.75 1. ]
|
|
|
|
mean value: 0.6766666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.33333333 0.66666667 1. 1. 0.33333333 0.75
|
|
0.5 0.5 0.75 0.25 ]
|
|
|
|
mean value: 0.6083333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.54166667 0.83333333 0.875 0.75 0.54166667 0.54166667
|
|
0.58333333 0.25 0.70833333 0.625 ]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.66666667 0.75 0.6 0.25 0.5
|
|
0.4 0.28571429 0.6 0.25 ]
|
|
|
|
mean value: 0.4552380952380952
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.29
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.85591006 0.8479023 0.83991838 0.86907196 0.83147216 0.82246804
|
|
0.86206722 0.92785144 0.79704881 0.87628222]
|
|
|
|
mean value: 0.8529992580413819
|
|
|
|
key: score_time
|
|
value: [0.20080066 0.22137713 0.20902681 0.223454 0.22496843 0.15259242
|
|
0.18264866 0.18653846 0.19532228 0.17014146]
|
|
|
|
mean value: 0.19668703079223632
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.41666667 0.41666667 0.16666667 -0.16666667 0.09128709
|
|
0.16666667 -0.16666667 0.73029674 0.35355339]
|
|
|
|
mean value: 0.2425137226851023
|
|
|
|
key: train_mcc
|
|
value: [0.93649194 0.8415746 0.87298387 0.87487431 0.84173387 0.87298387
|
|
0.90873893 0.87298387 0.87298387 0.87298387]
|
|
|
|
mean value: 0.8768333002260037
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.71428571 0.57142857 0.42857143 0.57142857
|
|
0.57142857 0.42857143 0.85714286 0.57142857]
|
|
|
|
mean value: 0.6142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [0.96825397 0.92063492 0.93650794 0.93650794 0.92063492 0.93650794
|
|
0.95238095 0.93650794 0.93650794 0.93650794]
|
|
|
|
mean value: 0.9380952380952381
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.66666667 0.57142857 0.33333333 0.66666667
|
|
0.57142857 0.5 0.88888889 0.4 ]
|
|
|
|
mean value: 0.5931746031746031
|
|
|
|
key: train_fscore
|
|
value: [0.96875 0.92307692 0.9375 0.93548387 0.92063492 0.93548387
|
|
0.94915254 0.93548387 0.93548387 0.93548387]
|
|
|
|
mean value: 0.9376533740923434
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.66666667 0.5 0.33333333 0.6
|
|
0.66666667 0.5 0.8 1. ]
|
|
|
|
mean value: 0.64
|
|
|
|
key: train_precision
|
|
value: [0.96875 0.90909091 0.9375 0.96666667 0.93548387 0.93548387
|
|
1. 0.93548387 0.93548387 0.93548387]
|
|
|
|
mean value: 0.9459426930596285
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.66666667 0.66666667 0.33333333 0.75
|
|
0.5 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_recall
|
|
value: [0.96875 0.9375 0.9375 0.90625 0.90625 0.93548387
|
|
0.90322581 0.93548387 0.93548387 0.93548387]
|
|
|
|
mean value: 0.9301411290322581
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.70833333 0.58333333 0.41666667 0.54166667
|
|
0.58333333 0.41666667 0.83333333 0.625 ]
|
|
|
|
mean value: 0.6125
|
|
|
|
key: train_roc_auc
|
|
value: [0.96824597 0.9203629 0.93649194 0.93699597 0.92086694 0.93649194
|
|
0.9516129 0.93649194 0.93649194 0.93649194]
|
|
|
|
mean value: 0.9380544354838709
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.5 0.4 0.2 0.5
|
|
0.4 0.33333333 0.8 0.25 ]
|
|
|
|
mean value: 0.43833333333333335
|
|
|
|
key: train_jcc
|
|
value: [0.93939394 0.85714286 0.88235294 0.87878788 0.85294118 0.87878788
|
|
0.90322581 0.87878788 0.87878788 0.87878788]
|
|
|
|
mean value: 0.8828996114574862
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02029395 0.00822663 0.00816417 0.00813961 0.00823164 0.00823736
|
|
0.00818086 0.00820446 0.00823283 0.0082767 ]
|
|
|
|
mean value: 0.009418821334838868
|
|
|
|
key: score_time
|
|
value: [0.01400042 0.00832248 0.00841331 0.00829387 0.0082891 0.00830483
|
|
0.00829959 0.00828528 0.00829315 0.00832224]
|
|
|
|
mean value: 0.008882427215576172
|
|
|
|
key: test_mcc
|
|
value: [ 0.16666667 0.41666667 -0.16666667 -0.16666667 0.41666667 0.41666667
|
|
0.41666667 0.16666667 0.75 0.35355339]
|
|
|
|
mean value: 0.27702200572599406
|
|
|
|
key: train_mcc
|
|
value: [0.61982085 0.65120968 0.61895161 0.71471774 0.59049817 0.71471774
|
|
0.65085805 0.5892604 0.55909213 0.68865372]
|
|
|
|
mean value: 0.6397780094314123
|
|
|
|
key: test_accuracy
|
|
value: [0.57142857 0.71428571 0.42857143 0.42857143 0.71428571 0.71428571
|
|
0.71428571 0.57142857 0.85714286 0.57142857]
|
|
|
|
mean value: 0.6285714285714286
|
|
|
|
key: train_accuracy
|
|
value: [0.80952381 0.82539683 0.80952381 0.85714286 0.79365079 0.85714286
|
|
0.82539683 0.79365079 0.77777778 0.84126984]
|
|
|
|
mean value: 0.819047619047619
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.66666667 0.33333333 0.33333333 0.66666667 0.75
|
|
0.75 0.57142857 0.85714286 0.4 ]
|
|
|
|
mean value: 0.59
|
|
|
|
key: train_fscore
|
|
value: [0.81818182 0.82539683 0.8125 0.85714286 0.78688525 0.85714286
|
|
0.81967213 0.77966102 0.75862069 0.84848485]
|
|
|
|
mean value: 0.8163688290002712
|
|
|
|
key: test_precision
|
|
value: [0.5 0.66666667 0.33333333 0.33333333 0.66666667 0.75
|
|
0.75 0.66666667 1. 1. ]
|
|
|
|
mean value: 0.6666666666666666
|
|
|
|
key: train_precision
|
|
value: [0.79411765 0.83870968 0.8125 0.87096774 0.82758621 0.84375
|
|
0.83333333 0.82142857 0.81481481 0.8 ]
|
|
|
|
mean value: 0.8257207992886934
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.33333333 0.33333333 0.66666667 0.75
|
|
0.75 0.5 0.75 0.25 ]
|
|
|
|
mean value: 0.5666666666666667
|
|
|
|
key: train_recall
|
|
value: [0.84375 0.8125 0.8125 0.84375 0.75 0.87096774
|
|
0.80645161 0.74193548 0.70967742 0.90322581]
|
|
|
|
mean value: 0.8094758064516129
|
|
|
|
key: test_roc_auc
|
|
value: [0.58333333 0.70833333 0.41666667 0.41666667 0.70833333 0.70833333
|
|
0.70833333 0.58333333 0.875 0.625 ]
|
|
|
|
mean value: 0.6333333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.80897177 0.82560484 0.80947581 0.85735887 0.79435484 0.85735887
|
|
0.82510081 0.79284274 0.77671371 0.8422379 ]
|
|
|
|
mean value: 0.8190020161290322
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.5 0.2 0.2 0.5 0.6 0.6 0.4 0.75 0.25]
|
|
|
|
mean value: 0.44
|
|
|
|
key: train_jcc
|
|
value: [0.69230769 0.7027027 0.68421053 0.75 0.64864865 0.75
|
|
0.69444444 0.63888889 0.61111111 0.73684211]
|
|
|
|
mean value: 0.6909156119682436
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.06731701 0.03869319 0.04087901 0.03723073 0.03835511 0.03823161
|
|
0.10305953 0.07719755 0.03667355 0.03666496]
|
|
|
|
mean value: 0.05143022537231445
|
|
|
|
key: score_time
|
|
value: [0.01004529 0.01000285 0.01157188 0.00998616 0.0101099 0.01008081
|
|
0.01304054 0.01143241 0.01027799 0.01032186]
|
|
|
|
mean value: 0.010686969757080078
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.41666667 0.41666667 0.54772256 0.75 -0.35355339
|
|
0.16666667 0.54772256 0.73029674 0.16666667]
|
|
|
|
mean value: 0.38055218010906133
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.71428571 0.71428571 0.85714286 0.42857143
|
|
0.57142857 0.71428571 0.85714286 0.57142857]
|
|
|
|
mean value: 0.6857142857142857
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.66666667 0.75 0.85714286 0.6
|
|
0.57142857 0.66666667 0.88888889 0.57142857]
|
|
|
|
mean value: 0.6905555555555556
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.66666667 0.6 0.75 0.5
|
|
0.66666667 1. 0.8 0.66666667]
|
|
|
|
mean value: 0.6983333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.66666667 1. 1. 0.75
|
|
0.5 0.5 1. 0.5 ]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.70833333 0.75 0.875 0.375
|
|
0.58333333 0.75 0.83333333 0.58333333]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.5 0.6 0.75 0.42857143
|
|
0.4 0.5 0.8 0.4 ]
|
|
|
|
mean value: 0.5378571428571428
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.38
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02187252 0.03770638 0.04406595 0.04025984 0.03756094 0.03748059
|
|
0.03778553 0.03821254 0.03767872 0.03784513]
|
|
|
|
mean value: 0.03704681396484375
|
|
|
|
key: score_time
|
|
value: [0.0229826 0.03857613 0.03574252 0.02186775 0.01915789 0.02113247
|
|
0.02071667 0.02346253 0.02315164 0.01696062]
|
|
|
|
mean value: 0.024375081062316895
|
|
|
|
key: test_mcc
|
|
value: [-0.47140452 0.73029674 0.09128709 0.16666667 0.09128709 -0.54772256
|
|
0.09128709 0.47140452 0.47140452 0.16666667]
|
|
|
|
mean value: 0.12611733187120033
|
|
|
|
key: train_mcc
|
|
value: [1. 0.96871896 0.96871896 0.96875 1. 1.
|
|
1. 0.96871896 1. 0.96871896]
|
|
|
|
mean value: 0.9843625837571862
|
|
|
|
key: test_accuracy
|
|
value: [0.28571429 0.85714286 0.57142857 0.57142857 0.57142857 0.28571429
|
|
0.57142857 0.71428571 0.71428571 0.57142857]
|
|
|
|
mean value: 0.5714285714285714
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98412698 0.98412698 0.98412698 1. 1.
|
|
1. 0.98412698 1. 0.98412698]
|
|
|
|
mean value: 0.9920634920634921
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.8 0.4 0.57142857 0.4 0.44444444
|
|
0.66666667 0.8 0.8 0.57142857]
|
|
|
|
mean value: 0.5898412698412698
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98461538 0.98461538 0.98412698 1. 1.
|
|
1. 0.98360656 1. 0.98360656]
|
|
|
|
mean value: 0.9920570868111852
|
|
|
|
key: test_precision
|
|
value: [0.33333333 1. 0.5 0.5 0.5 0.4
|
|
0.6 0.66666667 0.66666667 0.66666667]
|
|
|
|
mean value: 0.5833333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 0.96969697 0.96969697 1. 1. 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9939393939393939
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.33333333 0.66666667 0.33333333 0.5
|
|
0.75 1. 1. 0.5 ]
|
|
|
|
mean value: 0.6416666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.96875 1. 1.
|
|
1. 0.96774194 1. 0.96774194]
|
|
|
|
mean value: 0.9904233870967742
|
|
|
|
key: test_roc_auc
|
|
value: [0.33333333 0.83333333 0.54166667 0.58333333 0.54166667 0.25
|
|
0.54166667 0.66666667 0.66666667 0.58333333]
|
|
|
|
mean value: 0.5541666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98387097 0.98387097 0.984375 1. 1.
|
|
1. 0.98387097 1. 0.98387097]
|
|
|
|
mean value: 0.9919858870967742
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.66666667 0.25 0.4 0.25 0.28571429
|
|
0.5 0.66666667 0.66666667 0.4 ]
|
|
|
|
mean value: 0.4371428571428571
|
|
|
|
key: train_jcc
|
|
value: [1. 0.96969697 0.96969697 0.96875 1. 1.
|
|
1. 0.96774194 1. 0.96774194]
|
|
|
|
mean value: 0.9843627810361681
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0275228 0.00869274 0.00853896 0.00843048 0.00822639 0.00832152
|
|
0.00833821 0.00831127 0.0087409 0.00877428]
|
|
|
|
mean value: 0.010389757156372071
|
|
|
|
key: score_time
|
|
value: [0.00885653 0.00863767 0.00864148 0.00854325 0.00834703 0.00835729
|
|
0.00838327 0.0083456 0.00867915 0.00876808]
|
|
|
|
mean value: 0.008555936813354491
|
|
|
|
key: test_mcc
|
|
value: [0.75 0.41666667 0.75 0.54772256 0.41666667 0.47140452
|
|
0.41666667 0.09128709 1. 0.35355339]
|
|
|
|
mean value: 0.5213967561806999
|
|
|
|
key: train_mcc
|
|
value: [0.62939541 0.65315611 0.55909213 0.65315611 0.65821474 0.65419917
|
|
0.62475802 0.55544355 0.5957539 0.74772995]
|
|
|
|
mean value: 0.6330899075832417
|
|
|
|
key: test_accuracy
|
|
value: [0.85714286 0.71428571 0.85714286 0.71428571 0.71428571 0.71428571
|
|
0.71428571 0.57142857 1. 0.57142857]
|
|
|
|
mean value: 0.7428571428571429
|
|
|
|
key: train_accuracy
|
|
value: [0.80952381 0.82539683 0.77777778 0.82539683 0.82539683 0.82539683
|
|
0.80952381 0.77777778 0.79365079 0.87301587]
|
|
|
|
mean value: 0.8142857142857143
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.66666667 0.85714286 0.75 0.66666667 0.8
|
|
0.75 0.66666667 1. 0.4 ]
|
|
|
|
mean value: 0.7414285714285714
|
|
|
|
key: train_fscore
|
|
value: [0.82857143 0.8358209 0.79411765 0.8358209 0.84057971 0.83076923
|
|
0.81818182 0.77419355 0.80597015 0.875 ]
|
|
|
|
mean value: 0.8239025323411833
|
|
|
|
key: test_precision
|
|
value: [0.75 0.66666667 0.75 0.6 0.66666667 0.66666667
|
|
0.75 0.6 1. 1. ]
|
|
|
|
mean value: 0.745
|
|
|
|
key: train_precision
|
|
value: [0.76315789 0.8 0.75 0.8 0.78378378 0.79411765
|
|
0.77142857 0.77419355 0.75 0.84848485]
|
|
|
|
mean value: 0.7835166293879966
|
|
|
|
key: test_recall
|
|
value: [1. 0.66666667 1. 1. 0.66666667 1.
|
|
0.75 0.75 1. 0.25 ]
|
|
|
|
mean value: 0.8083333333333333
|
|
|
|
key: train_recall
|
|
value: [0.90625 0.875 0.84375 0.875 0.90625 0.87096774
|
|
0.87096774 0.77419355 0.87096774 0.90322581]
|
|
|
|
mean value: 0.8696572580645161
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.70833333 0.875 0.75 0.70833333 0.66666667
|
|
0.70833333 0.54166667 1. 0.625 ]
|
|
|
|
mean value: 0.7458333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.80796371 0.82459677 0.77671371 0.82459677 0.82409274 0.82610887
|
|
0.81048387 0.77772177 0.79485887 0.8734879 ]
|
|
|
|
mean value: 0.8140625
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.5 0.75 0.6 0.5 0.66666667
|
|
0.6 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.6116666666666667
|
|
|
|
key: train_jcc
|
|
value: [0.70731707 0.71794872 0.65853659 0.71794872 0.725 0.71052632
|
|
0.69230769 0.63157895 0.675 0.77777778]
|
|
|
|
mean value: 0.7013941827677386
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00936675 0.01185012 0.01197433 0.01253963 0.01194286 0.01255822
|
|
0.01267052 0.01219106 0.01249242 0.01214719]
|
|
|
|
mean value: 0.011973309516906738
|
|
|
|
key: score_time
|
|
value: [0.00917697 0.01127982 0.01137233 0.01138282 0.01140451 0.01137805
|
|
0.01142406 0.01142311 0.01134992 0.01129985]
|
|
|
|
mean value: 0.011149144172668457
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.73029674 0.41666667 0.54772256 0.47140452 0.
|
|
0.73029674 -0.09128709 0.41666667 0.35355339]
|
|
|
|
mean value: 0.3991986862652387
|
|
|
|
key: train_mcc
|
|
value: [0.90873893 0.57759945 0.45268961 0.90514678 0.6712536 0.84530217
|
|
0.77211 0.78719616 0.8415746 0.79833297]
|
|
|
|
mean value: 0.7559944262614172
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.71428571 0.71428571 0.71428571 0.57142857
|
|
0.85714286 0.42857143 0.71428571 0.57142857]
|
|
|
|
mean value: 0.6857142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.95238095 0.76190476 0.66666667 0.95238095 0.80952381 0.92063492
|
|
0.87301587 0.88888889 0.92063492 0.88888889]
|
|
|
|
mean value: 0.8634920634920634
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.66666667 0.75 0.5 0.72727273
|
|
0.88888889 0.33333333 0.75 0.4 ]
|
|
|
|
mean value: 0.6482828282828282
|
|
|
|
key: train_fscore
|
|
value: [0.95522388 0.70588235 0.51162791 0.95384615 0.76923077 0.92307692
|
|
0.88571429 0.87719298 0.91803279 0.89855072]
|
|
|
|
mean value: 0.8398378766362135
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.66666667 0.6 1. 0.57142857
|
|
0.8 0.5 0.75 1. ]
|
|
|
|
mean value: 0.7554761904761904
|
|
|
|
key: train_precision
|
|
value: [0.91428571 0.94736842 1. 0.93939394 1. 0.88235294
|
|
0.79487179 0.96153846 0.93333333 0.81578947]
|
|
|
|
mean value: 0.9188934079336556
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 0.66666667 1. 0.33333333 1.
|
|
1. 0.25 0.75 0.25 ]
|
|
|
|
mean value: 0.6583333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 0.5625 0.34375 0.96875 0.625 0.96774194
|
|
1. 0.80645161 0.90322581 1. ]
|
|
|
|
mean value: 0.817741935483871
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.83333333 0.70833333 0.75 0.66666667 0.5
|
|
0.83333333 0.45833333 0.70833333 0.625 ]
|
|
|
|
mean value: 0.6791666666666667
|
|
|
|
key: train_roc_auc
|
|
value: [0.9516129 0.76512097 0.671875 0.95211694 0.8125 0.92137097
|
|
0.875 0.88760081 0.9203629 0.890625 ]
|
|
|
|
mean value: 0.8648185483870968
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.5 0.6 0.33333333 0.57142857
|
|
0.8 0.2 0.6 0.25 ]
|
|
|
|
mean value: 0.5021428571428571
|
|
|
|
key: train_jcc
|
|
value: [0.91428571 0.54545455 0.34375 0.91176471 0.625 0.85714286
|
|
0.79487179 0.78125 0.84848485 0.81578947]
|
|
|
|
mean value: 0.7437793939806323
|
|
|
|
MCC on Blind test: 0.53
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01224613 0.01240611 0.01230764 0.01245379 0.01265526 0.01214504
|
|
0.01267958 0.01280499 0.02860785 0.02842355]
|
|
|
|
mean value: 0.015672993659973145
|
|
|
|
key: score_time
|
|
value: [0.01139903 0.01142216 0.01139045 0.0113802 0.0114193 0.01135778
|
|
0.01353288 0.01328421 0.02782512 0.01303291]
|
|
|
|
mean value: 0.013604402542114258
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.73029674 0.75 0.16666667 0.73029674 0.09128709
|
|
0.41666667 -0.41666667 0.41666667 0.35355339]
|
|
|
|
mean value: 0.3655433970191244
|
|
|
|
key: train_mcc
|
|
value: [0.79833297 0.74634526 0.82408564 0.96875 0.85168687 0.93844649
|
|
0.87298387 0.72270545 0.8415746 0.79833297]
|
|
|
|
mean value: 0.8363244121244525
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.85714286 0.57142857 0.85714286 0.57142857
|
|
0.71428571 0.28571429 0.71428571 0.57142857]
|
|
|
|
mean value: 0.6714285714285714
|
|
|
|
key: train_accuracy
|
|
value: [0.88888889 0.85714286 0.9047619 0.98412698 0.92063492 0.96825397
|
|
0.93650794 0.85714286 0.92063492 0.88888889]
|
|
|
|
mean value: 0.9126984126984127
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.85714286 0.57142857 0.8 0.66666667
|
|
0.75 0.28571429 0.75 0.4 ]
|
|
|
|
mean value: 0.6547619047619048
|
|
|
|
key: train_fscore
|
|
value: [0.87719298 0.83636364 0.91428571 0.98412698 0.92753623 0.96875
|
|
0.93548387 0.84210526 0.91803279 0.89855072]
|
|
|
|
mean value: 0.9102428194765096
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.75 0.5 1. 0.6
|
|
0.75 0.33333333 0.75 1. ]
|
|
|
|
mean value: 0.735
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.84210526 1. 0.86486486 0.93939394
|
|
0.93548387 0.92307692 0.93333333 0.81578947]
|
|
|
|
mean value: 0.9254047668478907
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 0.66666667 0.66666667 0.75
|
|
0.75 0.25 0.75 0.25 ]
|
|
|
|
mean value: 0.6416666666666666
|
|
|
|
key: train_recall
|
|
value: [0.78125 0.71875 1. 0.96875 1. 1.
|
|
0.93548387 0.77419355 0.90322581 1. ]
|
|
|
|
mean value: 0.9081653225806452
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.83333333 0.875 0.58333333 0.83333333 0.54166667
|
|
0.70833333 0.29166667 0.70833333 0.625 ]
|
|
|
|
mean value: 0.6708333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.890625 0.859375 0.90322581 0.984375 0.91935484 0.96875
|
|
0.93649194 0.85584677 0.9203629 0.890625 ]
|
|
|
|
mean value: 0.9129032258064516
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.75 0.4 0.66666667 0.5
|
|
0.6 0.16666667 0.6 0.25 ]
|
|
|
|
mean value: 0.51
|
|
|
|
key: train_jcc
|
|
value: [0.78125 0.71875 0.84210526 0.96875 0.86486486 0.93939394
|
|
0.87878788 0.72727273 0.84848485 0.81578947]
|
|
|
|
mean value: 0.8385448995646364
|
|
|
|
MCC on Blind test: 0.39
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09044528 0.07365727 0.07433462 0.07485151 0.07472205 0.07907557
|
|
0.0734818 0.07333899 0.07538867 0.07335329]
|
|
|
|
mean value: 0.07626490592956543
|
|
|
|
key: score_time
|
|
value: [0.01524663 0.01452994 0.0148325 0.0153861 0.0152564 0.01451254
|
|
0.01425242 0.01427031 0.01421475 0.01420379]
|
|
|
|
mean value: 0.014670538902282714
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.75 0.75 0.54772256 0.73029674 0.09128709
|
|
0.16666667 -0.16666667 -0.16666667 -0.09128709]
|
|
|
|
mean value: 0.30280193008453876
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.85714286 0.71428571 0.85714286 0.57142857
|
|
0.57142857 0.42857143 0.42857143 0.42857143]
|
|
|
|
mean value: 0.6428571428571428
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.85714286 0.85714286 0.75 0.8 0.66666667
|
|
0.57142857 0.5 0.5 0.33333333]
|
|
|
|
mean value: 0.6502380952380952
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.75 0.75 0.6 1. 0.6
|
|
0.66666667 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.6533333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 1. 1. 0.66666667 0.75
|
|
0.5 0.5 0.5 0.25 ]
|
|
|
|
mean value: 0.6833333333333333
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.875 0.875 0.75 0.83333333 0.54166667
|
|
0.58333333 0.41666667 0.41666667 0.45833333]
|
|
|
|
mean value: 0.6458333333333334
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.75 0.75 0.6 0.66666667 0.5
|
|
0.4 0.33333333 0.33333333 0.2 ]
|
|
|
|
mean value: 0.5033333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.35
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02794838 0.02676868 0.03286052 0.03834462 0.0278101 0.03556156
|
|
0.04615283 0.02656531 0.03106761 0.04069853]
|
|
|
|
mean value: 0.03337781429290772
|
|
|
|
key: score_time
|
|
value: [0.01677585 0.01702571 0.0370934 0.01686287 0.01918793 0.04119301
|
|
0.02168083 0.02007699 0.02238178 0.02312326]
|
|
|
|
mean value: 0.023540163040161134
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.73029674 0.75 0.16666667 -0.16666667 -0.35355339
|
|
0.41666667 0.09128709 0.73029674 0.54772256]
|
|
|
|
mean value: 0.3329383079843196
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 0.96875 1.
|
|
0.96871896 0.96871896 1. 0.96871896]
|
|
|
|
mean value: 0.9874906878178897
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.85714286 0.57142857 0.42857143 0.42857143
|
|
0.71428571 0.57142857 0.85714286 0.71428571]
|
|
|
|
mean value: 0.6714285714285714
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 0.98412698 1.
|
|
0.98412698 0.98412698 1. 0.98412698]
|
|
|
|
mean value: 0.9936507936507937
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.85714286 0.57142857 0.33333333 0.6
|
|
0.75 0.66666667 0.88888889 0.66666667]
|
|
|
|
mean value: 0.6800793650793651
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 0.98412698 1.
|
|
0.98360656 0.98360656 1. 0.98360656]
|
|
|
|
mean value: 0.9934946656258131
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.75 0.5 0.33333333 0.5
|
|
0.75 0.6 0.8 1. ]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 0.66666667 0.33333333 0.75
|
|
0.75 0.75 1. 0.5 ]
|
|
|
|
mean value: 0.7083333333333334
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.96875 1.
|
|
0.96774194 0.96774194 1. 0.96774194]
|
|
|
|
mean value: 0.9871975806451613
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.83333333 0.875 0.58333333 0.41666667 0.375
|
|
0.70833333 0.54166667 0.83333333 0.75 ]
|
|
|
|
mean value: 0.6625
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 0.984375 1.
|
|
0.98387097 0.98387097 1. 0.98387097]
|
|
|
|
mean value: 0.9935987903225807
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.75 0.4 0.2 0.42857143
|
|
0.6 0.5 0.8 0.5 ]
|
|
|
|
mean value: 0.5345238095238095
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 0.96875 1.
|
|
0.96774194 0.96774194 1. 0.96774194]
|
|
|
|
mean value: 0.9871975806451613
|
|
|
|
MCC on Blind test: 0.44
|
|
|
|
Accuracy on Blind test: 0.72
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0123353 0.01126575 0.01034904 0.01009893 0.01044917 0.01010227
|
|
0.01013827 0.01015067 0.01009631 0.01024008]
|
|
|
|
mean value: 0.010522580146789551
|
|
|
|
key: score_time
|
|
value: [0.01079106 0.0091629 0.00841069 0.00849533 0.00873351 0.00851822
|
|
0.00845027 0.00842023 0.00856018 0.00851512]
|
|
|
|
mean value: 0.00880575180053711
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.73029674 0.75 0.54772256 0.09128709 0.73029674
|
|
0.41666667 -0.16666667 0.73029674 0.35355339]
|
|
|
|
mean value: 0.46001199377032986
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.85714286 0.85714286 0.71428571 0.57142857 0.85714286
|
|
0.71428571 0.42857143 0.85714286 0.57142857]
|
|
|
|
mean value: 0.7142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.8 0.85714286 0.75 0.4 0.88888889
|
|
0.75 0.5 0.88888889 0.4 ]
|
|
|
|
mean value: 0.6901587301587302
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 1. 0.75 0.6 0.5 0.8
|
|
0.75 0.5 0.8 1. ]
|
|
|
|
mean value: 0.7366666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 1. 0.33333333 1.
|
|
0.75 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.83333333 0.875 0.75 0.54166667 0.83333333
|
|
0.70833333 0.41666667 0.83333333 0.625 ]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.66666667 0.75 0.6 0.25 0.8
|
|
0.6 0.33333333 0.8 0.25 ]
|
|
|
|
mean value: 0.555
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.27
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.16264439 0.15226102 0.15841031 0.15225506 0.15178418 0.15091753
|
|
0.15388393 0.15036559 0.15182185 0.15233397]
|
|
|
|
mean value: 0.15366778373718262
|
|
|
|
key: score_time
|
|
value: [0.00922966 0.00966811 0.00884342 0.00883937 0.00902224 0.00875044
|
|
0.00882006 0.00890803 0.00886679 0.00875664]
|
|
|
|
mean value: 0.00897047519683838
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.41666667 0.75 0.54772256 0.73029674 -0.35355339
|
|
0.41666667 0.41666667 0.75 0.16666667]
|
|
|
|
mean value: 0.4257799243585447
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.85714286 0.71428571 0.85714286 0.42857143
|
|
0.71428571 0.71428571 0.85714286 0.57142857]
|
|
|
|
mean value: 0.7142857142857143
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.85714286 0.75 0.8 0.6
|
|
0.75 0.75 0.85714286 0.57142857]
|
|
|
|
mean value: 0.7269047619047619
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.75 0.6 1. 0.5
|
|
0.75 0.75 1. 0.66666667]
|
|
|
|
mean value: 0.735
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 1. 0.66666667 0.75
|
|
0.75 0.75 0.75 0.5 ]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.875 0.75 0.83333333 0.375
|
|
0.70833333 0.70833333 0.875 0.58333333]
|
|
|
|
mean value: 0.7125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.75 0.6 0.66666667 0.42857143
|
|
0.6 0.6 0.75 0.4 ]
|
|
|
|
mean value: 0.5795238095238096
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.37
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.00914645 0.01261806 0.0128572 0.01263332 0.01288128 0.01266718
|
|
0.01286745 0.0129776 0.01280403 0.01338506]
|
|
|
|
mean value: 0.012483763694763183
|
|
|
|
key: score_time
|
|
value: [0.0085032 0.0114584 0.01143193 0.01141334 0.01139045 0.01379275
|
|
0.01370335 0.01156902 0.01408124 0.01405096]
|
|
|
|
mean value: 0.012139463424682617
|
|
|
|
key: test_mcc
|
|
value: [-0.09128709 1. 0.16666667 0.16666667 -0.09128709 0.41666667
|
|
-0.75 0. -0.41666667 0.54772256]
|
|
|
|
mean value: 0.0948481705003444
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.42857143 1. 0.57142857 0.57142857 0.42857143 0.71428571
|
|
0.14285714 0.57142857 0.28571429 0.71428571]
|
|
|
|
mean value: 0.5428571428571428
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 1. 0.57142857 0.57142857 0.5 0.75
|
|
0.25 0.72727273 0.28571429 0.66666667]
|
|
|
|
mean value: 0.5822510822510822
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.4 1. 0.5 0.5 0.4 0.75
|
|
0.25 0.57142857 0.33333333 1. ]
|
|
|
|
mean value: 0.5704761904761905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.66666667 1. 0.66666667 0.66666667 0.66666667 0.75
|
|
0.25 1. 0.25 0.5 ]
|
|
|
|
mean value: 0.6416666666666666
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.45833333 1. 0.58333333 0.58333333 0.45833333 0.70833333
|
|
0.125 0.5 0.29166667 0.75 ]
|
|
|
|
mean value: 0.5458333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 1. 0.4 0.4 0.33333333 0.6
|
|
0.14285714 0.57142857 0.16666667 0.5 ]
|
|
|
|
mean value: 0.44476190476190475
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01274872 0.0123713 0.01229191 0.01231217 0.01228619 0.01528502
|
|
0.01234388 0.01238465 0.01233101 0.0123322 ]
|
|
|
|
mean value: 0.012668704986572266
|
|
|
|
key: score_time
|
|
value: [0.0114243 0.0113318 0.01136684 0.01140285 0.01176333 0.01134515
|
|
0.01131439 0.01137829 0.01128244 0.01130056]
|
|
|
|
mean value: 0.011390995979309083
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.41666667 0.75 0.16666667 0.47140452 -0.35355339
|
|
0.73029674 0.41666667 0.47140452 0.54772256]
|
|
|
|
mean value: 0.40339416185008437
|
|
|
|
key: train_mcc
|
|
value: [0.96871896 0.93649194 0.93649194 0.96875 0.93649194 0.96875
|
|
0.96875 0.93649194 0.93649194 0.96875 ]
|
|
|
|
mean value: 0.952617863681232
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.85714286 0.57142857 0.71428571 0.42857143
|
|
0.85714286 0.71428571 0.71428571 0.71428571]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_accuracy
|
|
value: [0.98412698 0.96825397 0.96825397 0.98412698 0.96825397 0.98412698
|
|
0.98412698 0.96825397 0.96825397 0.98412698]
|
|
|
|
mean value: 0.9761904761904762
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.85714286 0.57142857 0.5 0.6
|
|
0.88888889 0.75 0.8 0.66666667]
|
|
|
|
mean value: 0.6967460317460318
|
|
|
|
key: train_fscore
|
|
value: [0.98461538 0.96875 0.96875 0.98412698 0.96875 0.98412698
|
|
0.98412698 0.96774194 0.96774194 0.98412698]
|
|
|
|
mean value: 0.9762857192091063
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.75 0.5 1. 0.5
|
|
0.8 0.75 0.66666667 1. ]
|
|
|
|
mean value: 0.73
|
|
|
|
key: train_precision
|
|
value: [0.96969697 0.96875 0.96875 1. 0.96875 0.96875
|
|
0.96875 0.96774194 0.96774194 0.96875 ]
|
|
|
|
mean value: 0.9717680840664712
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 0.66666667 0.33333333 0.75
|
|
1. 0.75 1. 0.5 ]
|
|
|
|
mean value: 0.7333333333333333
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:175: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:178: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[1. 0.96875 0.96875 0.96875 0.96875 1.
|
|
1. 0.96774194 0.96774194 1. ]
|
|
|
|
mean value: 0.9810483870967742
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.875 0.58333333 0.66666667 0.375
|
|
0.83333333 0.70833333 0.66666667 0.75 ]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [0.98387097 0.96824597 0.96824597 0.984375 0.96824597 0.984375
|
|
0.984375 0.96824597 0.96824597 0.984375 ]
|
|
|
|
mean value: 0.9762600806451613
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.75 0.4 0.33333333 0.42857143
|
|
0.8 0.6 0.66666667 0.5 ]
|
|
|
|
mean value: 0.5478571428571428
|
|
|
|
key: train_jcc
|
|
value: [0.96969697 0.93939394 0.93939394 0.96875 0.93939394 0.96875
|
|
0.96875 0.9375 0.9375 0.96875 ]
|
|
|
|
mean value: 0.9537878787878789
|
|
|
|
MCC on Blind test: 0.42
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.08231831 0.08137202 0.08322096 0.08132386 0.08178163 0.08155179
|
|
0.08170795 0.08200383 0.08465338 0.1203742 ]
|
|
|
|
mean value: 0.08603079319000244
|
|
|
|
key: score_time
|
|
value: [0.01158738 0.01150417 0.01153541 0.01151085 0.01156425 0.01151228
|
|
0.01149774 0.01154709 0.01159549 0.02400136]
|
|
|
|
mean value: 0.012785601615905761
|
|
|
|
key: test_mcc
|
|
value: [ 0.41666667 0.41666667 0.75 0.54772256 0.73029674 -0.35355339
|
|
0.73029674 0.16666667 0.47140452 0.35355339]
|
|
|
|
mean value: 0.42297205649766406
|
|
|
|
key: train_mcc
|
|
value: [0.81572458 0.75156646 0.78719616 0.87462485 0.72270545 0.96875
|
|
0.75254943 0.81644514 0.93649194 0.84530217]
|
|
|
|
mean value: 0.8271356177461342
|
|
|
|
key: test_accuracy
|
|
value: [0.71428571 0.71428571 0.85714286 0.71428571 0.85714286 0.42857143
|
|
0.85714286 0.57142857 0.71428571 0.57142857]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_accuracy
|
|
value: [0.9047619 0.87301587 0.88888889 0.93650794 0.85714286 0.98412698
|
|
0.87301587 0.9047619 0.96825397 0.92063492]
|
|
|
|
mean value: 0.9111111111111111
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.66666667 0.85714286 0.75 0.8 0.6
|
|
0.88888889 0.57142857 0.8 0.4 ]
|
|
|
|
mean value: 0.7000793650793651
|
|
|
|
key: train_fscore
|
|
value: [0.91176471 0.88235294 0.89855072 0.93939394 0.86956522 0.98412698
|
|
0.87878788 0.90909091 0.96774194 0.92307692]
|
|
|
|
mean value: 0.9164452159048314
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.75 0.6 1. 0.5
|
|
0.8 0.66666667 0.66666667 1. ]
|
|
|
|
mean value: 0.7316666666666667
|
|
|
|
key: train_precision
|
|
value: [0.86111111 0.83333333 0.83783784 0.91176471 0.81081081 0.96875
|
|
0.82857143 0.85714286 0.96774194 0.88235294]
|
|
|
|
mean value: 0.8759416961350074
|
|
|
|
key: test_recall
|
|
value: [0.66666667 0.66666667 1. 1. 0.66666667 0.75
|
|
1. 0.5 1. 0.25 ]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_recall
|
|
value: [0.96875 0.9375 0.96875 0.96875 0.9375 1.
|
|
0.93548387 0.96774194 0.96774194 0.96774194]
|
|
|
|
mean value: 0.9619959677419355
|
|
|
|
key: test_roc_auc
|
|
value: [0.70833333 0.70833333 0.875 0.75 0.83333333 0.375
|
|
0.83333333 0.58333333 0.66666667 0.625 ]
|
|
|
|
mean value: 0.6958333333333333
|
|
|
|
key: train_roc_auc
|
|
value: [0.90372984 0.87197581 0.88760081 0.9359879 0.85584677 0.984375
|
|
0.87399194 0.90574597 0.96824597 0.92137097]
|
|
|
|
mean value: 0.9108870967741935
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.5 0.75 0.6 0.66666667 0.42857143
|
|
0.8 0.4 0.66666667 0.25 ]
|
|
|
|
mean value: 0.5561904761904762
|
|
|
|
key: train_jcc
|
|
value: [0.83783784 0.78947368 0.81578947 0.88571429 0.76923077 0.96875
|
|
0.78378378 0.83333333 0.9375 0.85714286]
|
|
|
|
mean value: 0.8478556024937604
|
|
|
|
MCC on Blind test: 0.45
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03537726 0.03772926 0.04028106 0.03631926 0.03657913 0.03692508
|
|
0.03666854 0.03805637 0.03697419 0.03697133]
|
|
|
|
mean value: 0.037188148498535155
|
|
|
|
key: score_time
|
|
value: [0.01190233 0.01362658 0.01195407 0.01399827 0.01385236 0.01390648
|
|
0.01384187 0.01401973 0.0140295 0.01402974]
|
|
|
|
mean value: 0.013516092300415039
|
|
|
|
key: test_mcc
|
|
value: [0.85238636 0.79833297 0.82507166 0.82507166 0.77822581 0.93832585
|
|
0.90873893 0.90873893 0.96871896 0.85168687]
|
|
|
|
mean value: 0.8655298005601736
|
|
|
|
key: train_mcc
|
|
value: [0.90242401 0.90564657 0.91536977 0.90887831 0.9001672 0.90567804
|
|
0.90567804 0.90245767 0.89604431 0.91539508]
|
|
|
|
mean value: 0.9057738986098084
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.88888889 0.9047619 0.9047619 0.88888889 0.96825397
|
|
0.95238095 0.95238095 0.98412698 0.92063492]
|
|
|
|
mean value: 0.9285714285714285
|
|
|
|
key: train_accuracy
|
|
value: [0.94885362 0.95061728 0.95590829 0.95238095 0.94885362 0.95061728
|
|
0.95061728 0.94885362 0.94532628 0.95590829]
|
|
|
|
mean value: 0.9507936507936507
|
|
|
|
key: test_fscore
|
|
value: [0.92537313 0.89855072 0.91176471 0.91176471 0.88888889 0.96969697
|
|
0.95522388 0.95522388 0.98461538 0.92753623]
|
|
|
|
mean value: 0.9328638507010076
|
|
|
|
key: train_fscore
|
|
value: [0.95142379 0.95302013 0.95784148 0.95462185 0.95076401 0.95286195
|
|
0.95286195 0.9512605 0.9480737 0.95769882]
|
|
|
|
mean value: 0.9530428186668227
|
|
|
|
key: test_precision
|
|
value: [0.86111111 0.81578947 0.83783784 0.83783784 0.875 0.94117647
|
|
0.91428571 0.91428571 0.96969697 0.86486486]
|
|
|
|
mean value: 0.8831885994192495
|
|
|
|
key: train_precision
|
|
value: [0.90734824 0.91025641 0.91909385 0.91318328 0.91803279 0.90996785
|
|
0.90996785 0.90705128 0.90127389 0.91883117]
|
|
|
|
mean value: 0.9115006598379707
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.90322581 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9903225806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98591549 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985915492957746
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.890625 0.90625 0.90625 0.8891129 0.96774194
|
|
0.9516129 0.9516129 0.98387097 0.91935484]
|
|
|
|
mean value: 0.9288306451612903
|
|
|
|
key: train_roc_auc
|
|
value: [0.94876325 0.95053004 0.95583039 0.95229682 0.94878814 0.95070423
|
|
0.95070423 0.94894366 0.94542254 0.95598592]
|
|
|
|
mean value: 0.9507969193251381
|
|
|
|
key: test_jcc
|
|
value: [0.86111111 0.81578947 0.83783784 0.83783784 0.8 0.94117647
|
|
0.91428571 0.91428571 0.96969697 0.86486486]
|
|
|
|
mean value: 0.8756885994192496
|
|
|
|
key: train_jcc
|
|
value: [0.90734824 0.91025641 0.91909385 0.91318328 0.90614887 0.90996785
|
|
0.90996785 0.90705128 0.90127389 0.91883117]
|
|
|
|
mean value: 0.9103122678808376
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.86072397 0.99515605 0.87675071 1.05883574 0.87704158 0.8813293
|
|
1.09496379 0.8876586 1.02215767 0.91126871]
|
|
|
|
mean value: 0.9465886116027832
|
|
|
|
key: score_time
|
|
value: [0.01844168 0.01409984 0.01407838 0.01407361 0.01442885 0.01418662
|
|
0.01216507 0.01816702 0.0191226 0.01435494]
|
|
|
|
mean value: 0.015311861038208007
|
|
|
|
key: test_mcc
|
|
value: [0.90900317 0.72098341 0.88034084 0.90900317 0.88034084 0.96871896
|
|
0.93832585 0.87988269 0.96871896 0.93832585]
|
|
|
|
mean value: 0.8993643751544609
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99647883 0.92517842 0.97905443 1. 0.98251662
|
|
1. 1. 0.94842295 1. ]
|
|
|
|
mean value: 0.9831651251073872
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.84126984 0.93650794 0.95238095 0.93650794 0.98412698
|
|
0.96825397 0.93650794 0.98412698 0.96825397]
|
|
|
|
mean value: 0.946031746031746
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99823633 0.96119929 0.98941799 1. 0.99118166
|
|
1. 1. 0.97354497 1. ]
|
|
|
|
mean value: 0.991358024691358
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.86111111 0.93939394 0.95384615 0.93939394 0.98461538
|
|
0.96969697 0.94117647 0.98461538 0.96969697]
|
|
|
|
mean value: 0.9497392476804242
|
|
|
|
key: train_fscore
|
|
value: [1. 0.99824253 0.96271186 0.98954704 1. 0.99124343
|
|
1. 1. 0.97418244 1. ]
|
|
|
|
mean value: 0.991592731012641
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.75609756 0.88571429 0.91176471 0.88571429 0.96969697
|
|
0.94117647 0.88888889 0.96969697 0.94117647]
|
|
|
|
mean value: 0.9061691313628186
|
|
|
|
key: train_precision
|
|
value: [1. 0.99649123 0.92810458 0.97931034 1. 0.98263889
|
|
1. 1. 0.94966443 1. ]
|
|
|
|
mean value: 0.983620946648025
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.84375 0.9375 0.953125 0.9375 0.98387097
|
|
0.96774194 0.93548387 0.98387097 0.96774194]
|
|
|
|
mean value: 0.9463709677419355
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99823322 0.96113074 0.98939929 1. 0.99119718
|
|
1. 1. 0.97359155 1. ]
|
|
|
|
mean value: 0.9913551983277759
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.75609756 0.88571429 0.91176471 0.88571429 0.96969697
|
|
0.94117647 0.88888889 0.96969697 0.94117647]
|
|
|
|
mean value: 0.9061691313628186
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99649123 0.92810458 0.97931034 1. 0.98263889
|
|
1. 1. 0.94966443 1. ]
|
|
|
|
mean value: 0.983620946648025
|
|
|
|
MCC on Blind test: 0.24
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01441288 0.01075721 0.0103128 0.01015496 0.01005268 0.01003385
|
|
0.01002359 0.01025057 0.01006818 0.01003981]
|
|
|
|
mean value: 0.010610651969909669
|
|
|
|
key: score_time
|
|
value: [0.01634812 0.00925112 0.00887322 0.0087502 0.00869656 0.00868344
|
|
0.00874066 0.00874066 0.00872993 0.00869584]
|
|
|
|
mean value: 0.009550976753234863
|
|
|
|
key: test_mcc
|
|
value: [0.5018715 0.57427105 0.72098341 0.74634526 0.65991202 0.69290694
|
|
0.47458835 0.59279592 0.5180609 0.64257546]
|
|
|
|
mean value: 0.6124310799307242
|
|
|
|
key: train_mcc
|
|
value: [0.64728299 0.63631047 0.63357196 0.62809992 0.61913175 0.6258883
|
|
0.63443281 0.63680058 0.64500382 0.63134107]
|
|
|
|
mean value: 0.6337863670632644
|
|
|
|
key: test_accuracy
|
|
value: [0.6984127 0.74603175 0.84126984 0.85714286 0.82539683 0.82539683
|
|
0.71428571 0.76190476 0.71428571 0.79365079]
|
|
|
|
mean value: 0.7777777777777778
|
|
|
|
key: train_accuracy
|
|
value: [0.79541446 0.78835979 0.78659612 0.78306878 0.78306878 0.78130511
|
|
0.79365079 0.78835979 0.79365079 0.78483245]
|
|
|
|
mean value: 0.7878306878306878
|
|
|
|
key: test_fscore
|
|
value: [0.7654321 0.79487179 0.86111111 0.87323944 0.8358209 0.85333333
|
|
0.76923077 0.81012658 0.7804878 0.83116883]
|
|
|
|
mean value: 0.8174822657779908
|
|
|
|
key: train_fscore
|
|
value: [0.83040936 0.8255814 0.82438316 0.82199711 0.81991215 0.82028986
|
|
0.82615156 0.82507289 0.82869693 0.82267442]
|
|
|
|
mean value: 0.8245168819475409
|
|
|
|
key: test_precision
|
|
value: [0.62 0.65957447 0.75609756 0.775 0.77777778 0.74418605
|
|
0.65217391 0.68085106 0.64 0.71111111]
|
|
|
|
mean value: 0.7016771941334499
|
|
|
|
key: train_precision
|
|
value: [0.71 0.7029703 0.70123457 0.6977887 0.70175439 0.6953317
|
|
0.71282051 0.70223325 0.7075 0.69876543]
|
|
|
|
mean value: 0.7030398839555868
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.90322581 1.
|
|
0.9375 1. 1. 1. ]
|
|
|
|
mean value: 0.9840725806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98591549 1.
|
|
0.98233216 1. 1. 1. ]
|
|
|
|
mean value: 0.9968247648434778
|
|
|
|
key: test_roc_auc
|
|
value: [0.703125 0.75 0.84375 0.859375 0.8266129 0.82258065
|
|
0.71068548 0.75806452 0.70967742 0.79032258]
|
|
|
|
mean value: 0.7774193548387097
|
|
|
|
key: train_roc_auc
|
|
value: [0.795053 0.78798587 0.78621908 0.78268551 0.7827104 0.78169014
|
|
0.79398298 0.78873239 0.79401408 0.78521127]
|
|
|
|
mean value: 0.7878284726023989
|
|
|
|
key: test_jcc
|
|
value: [0.62 0.65957447 0.75609756 0.775 0.71794872 0.74418605
|
|
0.625 0.68085106 0.64 0.71111111]
|
|
|
|
mean value: 0.692976896846196
|
|
|
|
key: train_jcc
|
|
value: [0.71 0.7029703 0.70123457 0.6977887 0.69478908 0.6953317
|
|
0.70379747 0.70223325 0.7075 0.69876543]
|
|
|
|
mean value: 0.701441049101073
|
|
|
|
MCC on Blind test: 0.54
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01034069 0.01027012 0.01034474 0.01033282 0.01027346 0.01032972
|
|
0.01036072 0.01040125 0.01170158 0.01142716]
|
|
|
|
mean value: 0.010578227043151856
|
|
|
|
key: score_time
|
|
value: [0.00883722 0.00874519 0.00880337 0.00874853 0.0088141 0.00879169
|
|
0.00878572 0.00884461 0.00896287 0.00961447]
|
|
|
|
mean value: 0.008894777297973633
|
|
|
|
key: test_mcc
|
|
value: [0.53159579 0.58770161 0.71471774 0.33366935 0.61895161 0.65120968
|
|
0.52371369 0.71443023 0.5253647 0.42986904]
|
|
|
|
mean value: 0.5631223446835704
|
|
|
|
key: train_mcc
|
|
value: [0.61293024 0.61595667 0.59477519 0.61904643 0.59800129 0.62547933
|
|
0.56991191 0.65539873 0.60866699 0.60850577]
|
|
|
|
mean value: 0.6108672558866699
|
|
|
|
key: test_accuracy
|
|
value: [0.76190476 0.79365079 0.85714286 0.66666667 0.80952381 0.82539683
|
|
0.76190476 0.85714286 0.76190476 0.71428571]
|
|
|
|
mean value: 0.780952380952381
|
|
|
|
key: train_accuracy
|
|
value: [0.80599647 0.80776014 0.79717813 0.80952381 0.7989418 0.81128748
|
|
0.78483245 0.82716049 0.8042328 0.8042328 ]
|
|
|
|
mean value: 0.8051146384479717
|
|
|
|
key: test_fscore
|
|
value: [0.7761194 0.79365079 0.85714286 0.66666667 0.80645161 0.82539683
|
|
0.76923077 0.86153846 0.7761194 0.70967742]
|
|
|
|
mean value: 0.7841994211854587
|
|
|
|
key: train_fscore
|
|
value: [0.81164384 0.81174439 0.80138169 0.80985915 0.80139373 0.81956155
|
|
0.78745645 0.83161512 0.80628272 0.80492091]
|
|
|
|
mean value: 0.8085859552314767
|
|
|
|
key: test_precision
|
|
value: [0.72222222 0.78125 0.84375 0.65625 0.80645161 0.83870968
|
|
0.75757576 0.84848485 0.74285714 0.73333333]
|
|
|
|
mean value: 0.7730884594795885
|
|
|
|
key: train_precision
|
|
value: [0.79 0.79661017 0.78644068 0.80985915 0.79310345 0.78387097
|
|
0.7766323 0.80936455 0.79655172 0.8006993 ]
|
|
|
|
mean value: 0.7943132294142715
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.80645161 0.87096774 0.67741935 0.80645161 0.8125
|
|
0.78125 0.875 0.8125 0.6875 ]
|
|
|
|
mean value: 0.796875
|
|
|
|
key: train_recall
|
|
value: [0.83450704 0.82746479 0.81690141 0.80985915 0.80985915 0.85865724
|
|
0.79858657 0.85512367 0.81625442 0.80918728]
|
|
|
|
mean value: 0.8236400736574927
|
|
|
|
key: test_roc_auc
|
|
value: [0.76310484 0.79385081 0.85735887 0.66683468 0.80947581 0.82560484
|
|
0.76159274 0.85685484 0.76108871 0.71471774]
|
|
|
|
mean value: 0.7810483870967742
|
|
|
|
key: train_roc_auc
|
|
value: [0.8059461 0.80772533 0.79714328 0.80952322 0.79892251 0.81137088
|
|
0.78485667 0.82720972 0.80425397 0.80424153]
|
|
|
|
mean value: 0.8051193201612502
|
|
|
|
key: test_jcc
|
|
value: [0.63414634 0.65789474 0.75 0.5 0.67567568 0.7027027
|
|
0.625 0.75675676 0.63414634 0.55 ]
|
|
|
|
mean value: 0.6486322554904069
|
|
|
|
key: train_jcc
|
|
value: [0.68299712 0.68313953 0.6685879 0.68047337 0.66860465 0.69428571
|
|
0.64942529 0.71176471 0.6754386 0.67352941]
|
|
|
|
mean value: 0.6788246289017121
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01087141 0.01106834 0.01108122 0.01094699 0.01099539 0.01078033
|
|
0.01077986 0.01092768 0.01071858 0.01059294]
|
|
|
|
mean value: 0.010876274108886719
|
|
|
|
key: score_time
|
|
value: [0.01619625 0.01389289 0.0140512 0.01515293 0.01512074 0.01357436
|
|
0.01328564 0.01256061 0.01284027 0.01275682]
|
|
|
|
mean value: 0.013943171501159668
|
|
|
|
key: test_mcc
|
|
value: [0.88034084 0.74634526 0.77211 0.79833297 0.82507166 0.77042092
|
|
0.82408564 0.82408564 0.85168687 0.79701677]
|
|
|
|
mean value: 0.8089496574588511
|
|
|
|
key: train_mcc
|
|
value: [0.85512181 0.85512181 0.85203313 0.85512181 0.86755413 0.85828944
|
|
0.83375562 0.86449937 0.84903301 0.86449937]
|
|
|
|
mean value: 0.8555029486940321
|
|
|
|
key: test_accuracy
|
|
value: [0.93650794 0.85714286 0.87301587 0.88888889 0.9047619 0.87301587
|
|
0.9047619 0.9047619 0.92063492 0.88888889]
|
|
|
|
mean value: 0.8952380952380952
|
|
|
|
key: train_accuracy
|
|
value: [0.92239859 0.92239859 0.92063492 0.92239859 0.92945326 0.92416226
|
|
0.91005291 0.92768959 0.91887125 0.92768959]
|
|
|
|
mean value: 0.9225749559082892
|
|
|
|
key: test_fscore
|
|
value: [0.93939394 0.87323944 0.88571429 0.89855072 0.91176471 0.88888889
|
|
0.91428571 0.91428571 0.92753623 0.90140845]
|
|
|
|
mean value: 0.9055068092296579
|
|
|
|
key: train_fscore
|
|
value: [0.92810458 0.92810458 0.92659054 0.92810458 0.93421053 0.92939245
|
|
0.91734198 0.9324547 0.9248366 0.9324547 ]
|
|
|
|
mean value: 0.9281595205837426
|
|
|
|
key: test_precision
|
|
value: [0.88571429 0.775 0.79487179 0.81578947 0.83783784 0.8
|
|
0.84210526 0.84210526 0.86486486 0.82051282]
|
|
|
|
mean value: 0.8278801603801603
|
|
|
|
key: train_precision
|
|
value: [0.86585366 0.86585366 0.86322188 0.86585366 0.87654321 0.86809816
|
|
0.84730539 0.87345679 0.86018237 0.87345679]
|
|
|
|
mean value: 0.8659825569783122
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.859375 0.875 0.890625 0.90625 0.87096774
|
|
0.90322581 0.90322581 0.91935484 0.88709677]
|
|
|
|
mean value: 0.8952620967741935
|
|
|
|
key: train_roc_auc
|
|
value: [0.92226148 0.92226148 0.9204947 0.92226148 0.92932862 0.92429577
|
|
0.91021127 0.9278169 0.91901408 0.9278169 ]
|
|
|
|
mean value: 0.9225762703429055
|
|
|
|
key: test_jcc
|
|
value: [0.88571429 0.775 0.79487179 0.81578947 0.83783784 0.8
|
|
0.84210526 0.84210526 0.86486486 0.82051282]
|
|
|
|
mean value: 0.8278801603801603
|
|
|
|
key: train_jcc
|
|
value: [0.86585366 0.86585366 0.86322188 0.86585366 0.87654321 0.86809816
|
|
0.84730539 0.87345679 0.86018237 0.87345679]
|
|
|
|
mean value: 0.8659825569783122
|
|
|
|
MCC on Blind test: 0.18
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02843356 0.02713656 0.02664638 0.02658343 0.0270741 0.02698016
|
|
0.02736759 0.0260179 0.02724957 0.02557659]
|
|
|
|
mean value: 0.026906585693359374
|
|
|
|
key: score_time
|
|
value: [0.01400304 0.01314855 0.01354074 0.01311946 0.01355529 0.013134
|
|
0.01298809 0.01305914 0.01369715 0.01349735]
|
|
|
|
mean value: 0.01337428092956543
|
|
|
|
key: test_mcc
|
|
value: [0.85238636 0.79833297 0.77211 0.82507166 0.85238636 0.87988269
|
|
0.90873893 0.90873893 1. 0.85168687]
|
|
|
|
mean value: 0.8649334768804765
|
|
|
|
key: train_mcc
|
|
value: [0.91862969 0.90564657 0.90887831 0.9218992 0.91862969 0.91539508
|
|
0.91865308 0.90245767 0.89924646 0.91865308]
|
|
|
|
mean value: 0.9128088814940158
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.88888889 0.87301587 0.9047619 0.92063492 0.93650794
|
|
0.95238095 0.95238095 1. 0.92063492]
|
|
|
|
mean value: 0.9269841269841269
|
|
|
|
key: train_accuracy
|
|
value: [0.95767196 0.95061728 0.95238095 0.95943563 0.95767196 0.95590829
|
|
0.95767196 0.94885362 0.94708995 0.95767196]
|
|
|
|
mean value: 0.9544973544973545
|
|
|
|
key: test_fscore
|
|
value: [0.92537313 0.89855072 0.88571429 0.91176471 0.92537313 0.94117647
|
|
0.95522388 0.95522388 1. 0.92753623]
|
|
|
|
mean value: 0.9325936448557359
|
|
|
|
key: train_fscore
|
|
value: [0.95945946 0.95302013 0.95462185 0.96108291 0.95945946 0.95769882
|
|
0.95932203 0.9512605 0.94966443 0.95932203]
|
|
|
|
mean value: 0.956491162930342
|
|
|
|
key: test_precision
|
|
value: [0.86111111 0.81578947 0.79487179 0.83783784 0.86111111 0.88888889
|
|
0.91428571 0.91428571 1. 0.86486486]
|
|
|
|
mean value: 0.8753046510941248
|
|
|
|
key: train_precision
|
|
value: [0.92207792 0.91025641 0.91318328 0.92508143 0.92207792 0.91883117
|
|
0.9218241 0.90705128 0.90415335 0.9218241 ]
|
|
|
|
mean value: 0.9166360981363869
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.921875 0.890625 0.875 0.90625 0.921875 0.93548387
|
|
0.9516129 0.9516129 1. 0.91935484]
|
|
|
|
mean value: 0.9273689516129032
|
|
|
|
key: train_roc_auc
|
|
value: [0.95759717 0.95053004 0.95229682 0.95936396 0.95759717 0.95598592
|
|
0.95774648 0.94894366 0.9471831 0.95774648]
|
|
|
|
mean value: 0.9544990792813417
|
|
|
|
key: test_jcc
|
|
value: [0.86111111 0.81578947 0.79487179 0.83783784 0.86111111 0.88888889
|
|
0.91428571 0.91428571 1. 0.86486486]
|
|
|
|
mean value: 0.8753046510941248
|
|
|
|
key: train_jcc
|
|
value: [0.92207792 0.91025641 0.91318328 0.92508143 0.92207792 0.91883117
|
|
0.9218241 0.90705128 0.90415335 0.9218241 ]
|
|
|
|
mean value: 0.9166360981363869
|
|
|
|
MCC on Blind test: 0.26
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [2.32024193 2.09809661 2.0483706 2.03898191 2.18624377 2.14060736
|
|
2.2699976 2.24575901 2.09141064 2.43969512]
|
|
|
|
mean value: 2.1879404544830323
|
|
|
|
key: score_time
|
|
value: [0.01304626 0.01440263 0.02195883 0.0230093 0.01489925 0.01890039
|
|
0.01919031 0.01925516 0.01902366 0.02326107]
|
|
|
|
mean value: 0.018694686889648437
|
|
|
|
key: test_mcc
|
|
value: [0.93844649 0.85238636 0.88034084 0.88034084 0.85238636 0.96871896
|
|
0.96871896 0.87988269 1. 0.87988269]
|
|
|
|
mean value: 0.9101104189243239
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.99647883 0.99647883 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9992957659011316
|
|
|
|
key: test_accuracy
|
|
value: [0.96825397 0.92063492 0.93650794 0.93650794 0.92063492 0.98412698
|
|
0.98412698 0.93650794 1. 0.93650794]
|
|
|
|
mean value: 0.9523809523809523
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.99823633 0.99823633 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.999647266313933
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.92537313 0.93939394 0.93939394 0.92537313 0.98461538
|
|
0.98461538 0.94117647 1. 0.94117647]
|
|
|
|
mean value: 0.9549867857851835
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.99824253 0.99824253 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996485061511423
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.86111111 0.88571429 0.88571429 0.86111111 0.96969697
|
|
0.96969697 0.88888889 1. 0.88888889]
|
|
|
|
mean value: 0.9150216450216451
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.99649123 0.99649123 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9992982456140351
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.921875 0.9375 0.9375 0.921875 0.98387097
|
|
0.98387097 0.93548387 1. 0.93548387]
|
|
|
|
mean value: 0.9526209677419355
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.99823322 0.99823322 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9996466431095407
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.86111111 0.88571429 0.88571429 0.86111111 0.96969697
|
|
0.96969697 0.88888889 1. 0.88888889]
|
|
|
|
mean value: 0.9150216450216451
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.99649123 0.99649123 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9992982456140351
|
|
|
|
MCC on Blind test: 0.2
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04148316 0.02694058 0.02874565 0.02711225 0.02842116 0.02762032
|
|
0.02688146 0.02726865 0.02711296 0.0314374 ]
|
|
|
|
mean value: 0.029302358627319336
|
|
|
|
key: score_time
|
|
value: [0.0092442 0.00875235 0.00871134 0.0087955 0.00884104 0.00888085
|
|
0.0088613 0.00879478 0.00890255 0.00894046]
|
|
|
|
mean value: 0.008872437477111816
|
|
|
|
key: test_mcc
|
|
value: [0.90900317 0.96875 0.88034084 0.90900317 0.90900317 0.90873893
|
|
0.93832585 0.93832585 0.87988269 0.85168687]
|
|
|
|
mean value: 0.9093060562650621
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.98412698 0.93650794 0.95238095 0.95238095 0.95238095
|
|
0.96825397 0.96825397 0.93650794 0.92063492]
|
|
|
|
mean value: 0.9523809523809523
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.98412698 0.93939394 0.95384615 0.95384615 0.95522388
|
|
0.96969697 0.96969697 0.94117647 0.92753623]
|
|
|
|
mean value: 0.9548389907522633
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.96875 0.88571429 0.91176471 0.91176471 0.91428571
|
|
0.94117647 0.94117647 0.88888889 0.86486486]
|
|
|
|
mean value: 0.9140150812577283
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.984375 0.9375 0.953125 0.953125 0.9516129
|
|
0.96774194 0.96774194 0.93548387 0.91935484]
|
|
|
|
mean value: 0.9523185483870968
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.96875 0.88571429 0.91176471 0.91176471 0.91428571
|
|
0.94117647 0.94117647 0.88888889 0.86486486]
|
|
|
|
mean value: 0.9140150812577283
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11696148 0.11729455 0.11611867 0.12381434 0.1320076 0.13120222
|
|
0.13175988 0.12507033 0.13208199 0.13061333]
|
|
|
|
mean value: 0.12569243907928468
|
|
|
|
key: score_time
|
|
value: [0.01753497 0.01784205 0.01767993 0.01932716 0.01953268 0.01946616
|
|
0.01910591 0.01864123 0.01937103 0.01943469]
|
|
|
|
mean value: 0.018793582916259766
|
|
|
|
key: test_mcc
|
|
value: [1. 0.96875 0.93844649 0.93844649 1. 1.
|
|
0.96871896 0.96871896 1. 0.90873893]
|
|
|
|
mean value: 0.9691819837405106
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.98412698 0.96825397 0.96825397 1. 1.
|
|
0.98412698 0.98412698 1. 0.95238095]
|
|
|
|
mean value: 0.9841269841269841
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.98412698 0.96875 0.96875 1. 1.
|
|
0.98461538 0.98461538 1. 0.95522388]
|
|
|
|
mean value: 0.9846081633954769
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96875 0.93939394 0.93939394 1. 1.
|
|
0.96969697 0.96969697 1. 0.91428571]
|
|
|
|
mean value: 0.9701217532467532
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.984375 0.96875 0.96875 1. 1.
|
|
0.98387097 0.98387097 1. 0.9516129 ]
|
|
|
|
mean value: 0.9841229838709677
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.96875 0.93939394 0.93939394 1. 1.
|
|
0.96969697 0.96969697 1. 0.91428571]
|
|
|
|
mean value: 0.9701217532467532
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.38
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01158023 0.01160431 0.01181746 0.01173711 0.0114677 0.01171851
|
|
0.0117538 0.01154327 0.01193142 0.011415 ]
|
|
|
|
mean value: 0.011656880378723145
|
|
|
|
key: score_time
|
|
value: [0.00983739 0.00964832 0.00959182 0.00965619 0.00953746 0.00949979
|
|
0.00954795 0.00951457 0.00905633 0.00963211]
|
|
|
|
mean value: 0.009552192687988282
|
|
|
|
key: test_mcc
|
|
value: [0.93844649 0.88034084 0.85238636 0.90900317 0.82507166 0.96871896
|
|
0.96871896 0.90873893 0.90873893 0.90873893]
|
|
|
|
mean value: 0.9068903244587634
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.96825397 0.93650794 0.92063492 0.95238095 0.9047619 0.98412698
|
|
0.98412698 0.95238095 0.95238095 0.95238095]
|
|
|
|
mean value: 0.9507936507936507
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.93939394 0.92537313 0.95384615 0.91176471 0.98461538
|
|
0.98461538 0.95522388 0.95522388 0.95522388]
|
|
|
|
mean value: 0.9534030344472619
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.88571429 0.86111111 0.91176471 0.83783784 0.96969697
|
|
0.96969697 0.91428571 0.91428571 0.91428571]
|
|
|
|
mean value: 0.9118072962190609
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.9375 0.921875 0.953125 0.90625 0.98387097
|
|
0.98387097 0.9516129 0.9516129 0.9516129 ]
|
|
|
|
mean value: 0.951008064516129
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.88571429 0.86111111 0.91176471 0.83783784 0.96969697
|
|
0.96969697 0.91428571 0.91428571 0.91428571]
|
|
|
|
mean value: 0.9118072962190609
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.86254716 1.80715823 1.71286321 1.70956802 1.82871461 1.76924825
|
|
1.7183063 1.75818324 1.74341345 1.74268937]
|
|
|
|
mean value: 1.7652691841125487
|
|
|
|
key: score_time
|
|
value: [0.10140896 0.09295535 0.09951162 0.10066724 0.10155296 0.09301758
|
|
0.14762664 0.09275389 0.10112715 0.09228444]
|
|
|
|
mean value: 0.10229058265686035
|
|
|
|
key: test_mcc
|
|
value: [1. 0.96875 0.88034084 0.90900317 0.96875 0.93832585
|
|
0.96871896 0.96871896 0.96871896 0.90873893]
|
|
|
|
mean value: 0.9480065680788993
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.98412698 0.93650794 0.95238095 0.98412698 0.96825397
|
|
0.98412698 0.98412698 0.98412698 0.95238095]
|
|
|
|
mean value: 0.973015873015873
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.98412698 0.93939394 0.95384615 0.98412698 0.96969697
|
|
0.98461538 0.98461538 0.98461538 0.95522388]
|
|
|
|
mean value: 0.97402610656342
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.96875 0.88571429 0.91176471 0.96875 0.94117647
|
|
0.96969697 0.96969697 0.96969697 0.91428571]
|
|
|
|
mean value: 0.9499532085561497
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.984375 0.9375 0.953125 0.984375 0.96774194
|
|
0.98387097 0.98387097 0.98387097 0.9516129 ]
|
|
|
|
mean value: 0.9730342741935484
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.96875 0.88571429 0.91176471 0.96875 0.94117647
|
|
0.96969697 0.96969697 0.96969697 0.91428571]
|
|
|
|
mean value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
0.9499532085561497
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.37
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.96623254 0.98398232 0.9792006 0.9972949 0.95658612 0.97668839
|
|
0.9849627 0.96769643 1.01467943 1.02247071]
|
|
|
|
mean value: 0.9849794149398804
|
|
|
|
key: score_time
|
|
value: [0.24952841 0.23086262 0.21296692 0.27785516 0.22465849 0.28579545
|
|
0.12745547 0.19762826 0.27785087 0.26674247]
|
|
|
|
mean value: 0.23513441085815429
|
|
|
|
key: test_mcc
|
|
value: [0.96875 0.85238636 0.88034084 0.88034084 0.90900317 0.93832585
|
|
0.96871896 0.93832585 0.96871896 0.90873893]
|
|
|
|
mean value: 0.9213649774372641
|
|
|
|
key: train_mcc
|
|
value: [0.97560496 0.972167 0.96874043 0.972167 0.96532513 0.97560706
|
|
0.96532937 0.97216973 0.96874387 0.97216973]
|
|
|
|
mean value: 0.9708024262129162
|
|
|
|
key: test_accuracy
|
|
value: [0.98412698 0.92063492 0.93650794 0.93650794 0.95238095 0.96825397
|
|
0.98412698 0.96825397 0.98412698 0.95238095]
|
|
|
|
mean value: 0.9587301587301587
|
|
|
|
key: train_accuracy
|
|
value: [0.98765432 0.98589065 0.98412698 0.98589065 0.98236332 0.98765432
|
|
0.98236332 0.98589065 0.98412698 0.98589065]
|
|
|
|
mean value: 0.9851851851851852
|
|
|
|
key: test_fscore
|
|
value: [0.98412698 0.92537313 0.93939394 0.93939394 0.95384615 0.96969697
|
|
0.98461538 0.96969697 0.98461538 0.95522388]
|
|
|
|
mean value: 0.9605982740311099
|
|
|
|
key: train_fscore
|
|
value: [0.98782609 0.98611111 0.98440208 0.98611111 0.98269896 0.9877836
|
|
0.98263889 0.98606272 0.98434783 0.98606272]
|
|
|
|
mean value: 0.9854045096468517
|
|
|
|
key: test_precision
|
|
value: [0.96875 0.86111111 0.88571429 0.88571429 0.91176471 0.94117647
|
|
0.96969697 0.94117647 0.96969697 0.91428571]
|
|
|
|
mean value: 0.924908698327816
|
|
|
|
key: train_precision
|
|
value: [0.97594502 0.97260274 0.96928328 0.97260274 0.96598639 0.97586207
|
|
0.96587031 0.97250859 0.96917808 0.97250859]
|
|
|
|
mean value: 0.9712347808097638
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.984375 0.921875 0.9375 0.9375 0.953125 0.96774194
|
|
0.98387097 0.96774194 0.98387097 0.9516129 ]
|
|
|
|
mean value: 0.958921370967742
|
|
|
|
key: train_roc_auc
|
|
value: [0.98763251 0.98586572 0.98409894 0.98586572 0.98233216 0.98767606
|
|
0.98239437 0.98591549 0.98415493 0.98591549]
|
|
|
|
mean value: 0.9851851391031703
|
|
|
|
key: test_jcc
|
|
value: [0.96875 0.86111111 0.88571429 0.88571429 0.91176471 0.94117647
|
|
0.96969697 0.94117647 0.96969697 0.91428571]
|
|
|
|
mean value: 0.924908698327816
|
|
|
|
key: train_jcc
|
|
value: [0.97594502 0.97260274 0.96928328 0.97260274 0.96598639 0.97586207
|
|
0.96587031 0.97250859 0.96917808 0.97250859]
|
|
|
|
mean value: 0.9712347808097638
|
|
|
|
MCC on Blind test: 0.15
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02457523 0.01037908 0.01053691 0.01046062 0.01127911 0.01047325
|
|
0.01143527 0.01095819 0.01064515 0.01041675]
|
|
|
|
mean value: 0.012115955352783203
|
|
|
|
key: score_time
|
|
value: [0.01191616 0.008986 0.00890946 0.00887513 0.00888062 0.00947309
|
|
0.00890756 0.01004696 0.0090971 0.00889754]
|
|
|
|
mean value: 0.009398961067199707
|
|
|
|
key: test_mcc
|
|
value: [0.53159579 0.58770161 0.71471774 0.33366935 0.61895161 0.65120968
|
|
0.52371369 0.71443023 0.5253647 0.42986904]
|
|
|
|
mean value: 0.5631223446835704
|
|
|
|
key: train_mcc
|
|
value: [0.61293024 0.61595667 0.59477519 0.61904643 0.59800129 0.62547933
|
|
0.56991191 0.65539873 0.60866699 0.60850577]
|
|
|
|
mean value: 0.6108672558866699
|
|
|
|
key: test_accuracy
|
|
value: [0.76190476 0.79365079 0.85714286 0.66666667 0.80952381 0.82539683
|
|
0.76190476 0.85714286 0.76190476 0.71428571]
|
|
|
|
mean value: 0.780952380952381
|
|
|
|
key: train_accuracy
|
|
value: [0.80599647 0.80776014 0.79717813 0.80952381 0.7989418 0.81128748
|
|
0.78483245 0.82716049 0.8042328 0.8042328 ]
|
|
|
|
mean value: 0.8051146384479717
|
|
|
|
key: test_fscore
|
|
value: [0.7761194 0.79365079 0.85714286 0.66666667 0.80645161 0.82539683
|
|
0.76923077 0.86153846 0.7761194 0.70967742]
|
|
|
|
mean value: 0.7841994211854587
|
|
|
|
key: train_fscore
|
|
value: [0.81164384 0.81174439 0.80138169 0.80985915 0.80139373 0.81956155
|
|
0.78745645 0.83161512 0.80628272 0.80492091]
|
|
|
|
mean value: 0.8085859552314767
|
|
|
|
key: test_precision
|
|
value: [0.72222222 0.78125 0.84375 0.65625 0.80645161 0.83870968
|
|
0.75757576 0.84848485 0.74285714 0.73333333]
|
|
|
|
mean value: 0.7730884594795885
|
|
|
|
key: train_precision
|
|
value: [0.79 0.79661017 0.78644068 0.80985915 0.79310345 0.78387097
|
|
0.7766323 0.80936455 0.79655172 0.8006993 ]
|
|
|
|
mean value: 0.7943132294142715
|
|
|
|
key: test_recall
|
|
value: [0.83870968 0.80645161 0.87096774 0.67741935 0.80645161 0.8125
|
|
0.78125 0.875 0.8125 0.6875 ]
|
|
|
|
mean value: 0.796875
|
|
|
|
key: train_recall
|
|
value: [0.83450704 0.82746479 0.81690141 0.80985915 0.80985915 0.85865724
|
|
0.79858657 0.85512367 0.81625442 0.80918728]
|
|
|
|
mean value: 0.8236400736574927
|
|
|
|
key: test_roc_auc
|
|
value: [0.76310484 0.79385081 0.85735887 0.66683468 0.80947581 0.82560484
|
|
0.76159274 0.85685484 0.76108871 0.71471774]
|
|
|
|
mean value: 0.7810483870967742
|
|
|
|
key: train_roc_auc
|
|
value: [0.8059461 0.80772533 0.79714328 0.80952322 0.79892251 0.81137088
|
|
0.78485667 0.82720972 0.80425397 0.80424153]
|
|
|
|
mean value: 0.8051193201612502
|
|
|
|
key: test_jcc
|
|
value: [0.63414634 0.65789474 0.75 0.5 0.67567568 0.7027027
|
|
0.625 0.75675676 0.63414634 0.55 ]
|
|
|
|
mean value: 0.6486322554904069
|
|
|
|
key: train_jcc
|
|
value: [0.68299712 0.68313953 0.6685879 0.68047337 0.66860465 0.69428571
|
|
0.64942529 0.71176471 0.6754386 0.67352941]
|
|
|
|
mean value: 0.6788246289017121
|
|
|
|
MCC on Blind test: 0.32
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.11746836 0.22087693 0.08631468 0.09118629 0.08772492 0.09107208
|
|
0.08957362 0.09848595 0.08927178 0.09097743]
|
|
|
|
mean value: 0.10629520416259766
|
|
|
|
key: score_time
|
|
value: [0.01111054 0.01142621 0.01103854 0.0110023 0.0110724 0.0111289
|
|
0.01107526 0.01140261 0.0110724 0.01106167]
|
|
|
|
mean value: 0.01113908290863037
|
|
|
|
key: test_mcc
|
|
value: [1. 0.90900317 0.88034084 0.88034084 0.96875 0.90873893
|
|
0.96871896 0.93832585 1. 0.90873893]
|
|
|
|
mean value: 0.9362957539881316
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.95238095 0.93650794 0.93650794 0.98412698 0.95238095
|
|
0.98412698 0.96825397 1. 0.95238095]
|
|
|
|
mean value: 0.9666666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.95384615 0.93939394 0.93939394 0.98412698 0.95522388
|
|
0.98461538 0.96969697 1. 0.95522388]
|
|
|
|
mean value: 0.9681521132267401
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.91176471 0.88571429 0.88571429 0.96875 0.91428571
|
|
0.96969697 0.94117647 1. 0.91428571]
|
|
|
|
mean value: 0.9391388146167557
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.953125 0.9375 0.9375 0.984375 0.9516129
|
|
0.98387097 0.96774194 1. 0.9516129 ]
|
|
|
|
mean value: 0.9667338709677419
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.91176471 0.88571429 0.88571429 0.96875 0.91428571
|
|
0.96969697 0.94117647 1. 0.91428571]
|
|
|
|
mean value: 0.9391388146167557
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.4
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.05076885 0.07648253 0.07648492 0.06601882 0.04623508 0.09058738
|
|
0.06839609 0.08257461 0.06715894 0.06943917]
|
|
|
|
mean value: 0.06941463947296142
|
|
|
|
key: score_time
|
|
value: [0.01933002 0.01212764 0.0143199 0.01220059 0.01220202 0.02033758
|
|
0.01245904 0.01808405 0.01322579 0.02331853]
|
|
|
|
mean value: 0.015760517120361327
|
|
|
|
key: test_mcc
|
|
value: [0.72098341 0.69597055 0.88034084 0.79833297 0.82507166 0.82408564
|
|
0.82408564 0.82408564 0.87988269 0.74424094]
|
|
|
|
mean value: 0.8017079978648479
|
|
|
|
key: train_mcc
|
|
value: [0.89921054 0.89921054 0.90242401 0.90887831 0.90564657 0.90567804
|
|
0.89924646 0.89924646 0.88966677 0.90567804]
|
|
|
|
mean value: 0.9014885730410923
|
|
|
|
key: test_accuracy
|
|
value: [0.84126984 0.82539683 0.93650794 0.88888889 0.9047619 0.9047619
|
|
0.9047619 0.9047619 0.93650794 0.85714286]
|
|
|
|
mean value: 0.8904761904761904
|
|
|
|
key: train_accuracy
|
|
value: [0.94708995 0.94708995 0.94885362 0.95238095 0.95061728 0.95061728
|
|
0.94708995 0.94708995 0.94179894 0.95061728]
|
|
|
|
mean value: 0.9483245149911816
|
|
|
|
key: test_fscore
|
|
value: [0.86111111 0.84931507 0.93939394 0.89855072 0.91176471 0.91428571
|
|
0.91428571 0.91428571 0.94117647 0.87671233]
|
|
|
|
mean value: 0.9020881491730737
|
|
|
|
key: train_fscore
|
|
value: [0.94983278 0.94983278 0.95142379 0.95462185 0.95302013 0.95286195
|
|
0.94966443 0.94966443 0.94490818 0.95286195]
|
|
|
|
mean value: 0.9508692265486598
|
|
|
|
key: test_precision
|
|
value: [0.75609756 0.73809524 0.88571429 0.81578947 0.83783784 0.84210526
|
|
0.84210526 0.84210526 0.88888889 0.7804878 ]
|
|
|
|
mean value: 0.8229226879547804
|
|
|
|
key: train_precision
|
|
value: [0.9044586 0.9044586 0.90734824 0.91318328 0.91025641 0.90996785
|
|
0.90415335 0.90415335 0.89556962 0.90996785]
|
|
|
|
mean value: 0.9063517151099575
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.84375 0.828125 0.9375 0.890625 0.90625 0.90322581
|
|
0.90322581 0.90322581 0.93548387 0.85483871]
|
|
|
|
mean value: 0.890625
|
|
|
|
key: train_roc_auc
|
|
value: [0.94699647 0.94699647 0.94876325 0.95229682 0.95053004 0.95070423
|
|
0.9471831 0.9471831 0.94190141 0.95070423]
|
|
|
|
mean value: 0.9483259095207286
|
|
|
|
key: test_jcc
|
|
value: [0.75609756 0.73809524 0.88571429 0.81578947 0.83783784 0.84210526
|
|
0.84210526 0.84210526 0.88888889 0.7804878 ]
|
|
|
|
mean value: 0.8229226879547804
|
|
|
|
key: train_jcc
|
|
value: [0.9044586 0.9044586 0.90734824 0.91318328 0.91025641 0.90996785
|
|
0.90415335 0.90415335 0.89556962 0.90996785]
|
|
|
|
mean value: 0.9063517151099575
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.53
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01622725 0.01096416 0.01015782 0.0102284 0.0104022 0.01073599
|
|
0.01099825 0.01049161 0.0104351 0.0104022 ]
|
|
|
|
mean value: 0.011104297637939454
|
|
|
|
key: score_time
|
|
value: [0.01244473 0.00929332 0.00884295 0.00865149 0.00919294 0.00885487
|
|
0.00966406 0.0095582 0.00905943 0.00956059]
|
|
|
|
mean value: 0.00951225757598877
|
|
|
|
key: test_mcc
|
|
value: [0.62861856 0.6712536 0.74634526 0.52679717 0.68865372 0.70447456
|
|
0.52928314 0.64257546 0.69290694 0.64257546]
|
|
|
|
mean value: 0.6473483880925772
|
|
|
|
key: train_mcc
|
|
value: [0.69231888 0.67911224 0.67911224 0.59184023 0.6812766 0.68683388
|
|
0.70289211 0.69980969 0.67653841 0.69688756]
|
|
|
|
mean value: 0.6786621831381685
|
|
|
|
key: test_accuracy
|
|
value: [0.79365079 0.80952381 0.85714286 0.76190476 0.84126984 0.84126984
|
|
0.74603175 0.79365079 0.82539683 0.79365079]
|
|
|
|
mean value: 0.8063492063492064
|
|
|
|
key: train_accuracy
|
|
value: [0.83068783 0.82363316 0.82363316 0.78835979 0.82186949 0.82716049
|
|
0.83597884 0.83597884 0.82186949 0.83421517]
|
|
|
|
mean value: 0.8243386243386244
|
|
|
|
key: test_fscore
|
|
value: [0.82191781 0.83783784 0.87323944 0.76923077 0.84848485 0.86111111
|
|
0.78947368 0.83116883 0.85333333 0.83116883]
|
|
|
|
mean value: 0.8316966491384985
|
|
|
|
key: train_fscore
|
|
value: [0.85276074 0.8470948 0.8470948 0.81012658 0.84720121 0.84969325
|
|
0.85670262 0.85581395 0.84532925 0.85448916]
|
|
|
|
mean value: 0.8466306369349164
|
|
|
|
key: test_precision
|
|
value: [0.71428571 0.72093023 0.775 0.73529412 0.8 0.775
|
|
0.68181818 0.71111111 0.74418605 0.71111111]
|
|
|
|
mean value: 0.7368736515042945
|
|
|
|
key: train_precision
|
|
value: [0.75543478 0.74864865 0.74864865 0.73563218 0.74270557 0.75067751
|
|
0.75956284 0.76243094 0.74594595 0.76033058]
|
|
|
|
mean value: 0.75100176460958
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 1. 0.80645161 0.90322581 0.96875
|
|
0.9375 1. 1. 1. ]
|
|
|
|
mean value: 0.958366935483871
|
|
|
|
key: train_recall
|
|
value: [0.97887324 0.97535211 0.97535211 0.90140845 0.98591549 0.97879859
|
|
0.98233216 0.97526502 0.97526502 0.97526502]
|
|
|
|
mean value: 0.9703827203503708
|
|
|
|
key: test_roc_auc
|
|
value: [0.79637097 0.8125 0.859375 0.76260081 0.8422379 0.83921371
|
|
0.74294355 0.79032258 0.82258065 0.79032258]
|
|
|
|
mean value: 0.8058467741935483
|
|
|
|
key: train_roc_auc
|
|
value: [0.83042602 0.8233651 0.8233651 0.78816006 0.82157965 0.82742746
|
|
0.8362365 0.83622406 0.82213955 0.83446349]
|
|
|
|
mean value: 0.8243387000447917
|
|
|
|
key: test_jcc
|
|
value: [0.69767442 0.72093023 0.775 0.625 0.73684211 0.75609756
|
|
0.65217391 0.71111111 0.74418605 0.71111111]
|
|
|
|
mean value: 0.7130126499178887
|
|
|
|
key: train_jcc
|
|
value: [0.74331551 0.73474801 0.73474801 0.68085106 0.73490814 0.73866667
|
|
0.74932615 0.74796748 0.73209549 0.74594595]
|
|
|
|
mean value: 0.7342572458110427
|
|
|
|
MCC on Blind test: 0.41
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01729298 0.02173781 0.01869059 0.02543879 0.02392936 0.0283711
|
|
0.0240531 0.02316523 0.02329946 0.02798557]
|
|
|
|
mean value: 0.023396396636962892
|
|
|
|
key: score_time
|
|
value: [0.0104804 0.01121211 0.01202106 0.01538372 0.01211929 0.01215363
|
|
0.01207304 0.01212358 0.01208353 0.01187015]
|
|
|
|
mean value: 0.01215205192565918
|
|
|
|
key: test_mcc
|
|
value: [0.84530217 0.74634526 0.62939541 0.85238636 0.78719616 0.93649194
|
|
0.87298387 0.85168687 0.57427105 0.77800241]
|
|
|
|
mean value: 0.7874061500442916
|
|
|
|
key: train_mcc
|
|
value: [0.8959759 0.89921054 0.61731091 0.94841356 0.8365339 0.90268256
|
|
0.90640242 0.88228279 0.5298899 0.89472984]
|
|
|
|
mean value: 0.8313432321962604
|
|
|
|
key: test_accuracy
|
|
value: [0.92063492 0.85714286 0.80952381 0.92063492 0.88888889 0.96825397
|
|
0.93650794 0.92063492 0.74603175 0.88888889]
|
|
|
|
mean value: 0.8857142857142857
|
|
|
|
key: train_accuracy
|
|
value: [0.94708995 0.94708995 0.78659612 0.97354497 0.91710758 0.95061728
|
|
0.95238095 0.94003527 0.72839506 0.94708995]
|
|
|
|
mean value: 0.908994708994709
|
|
|
|
key: test_fscore
|
|
value: [0.92307692 0.87323944 0.78571429 0.92537313 0.87719298 0.96875
|
|
0.9375 0.92753623 0.66666667 0.89230769]
|
|
|
|
mean value: 0.8777357353053843
|
|
|
|
key: train_fscore
|
|
value: [0.94880546 0.94983278 0.73866091 0.97427101 0.91407678 0.95189003
|
|
0.95368782 0.94197952 0.63507109 0.94604317]
|
|
|
|
mean value: 0.8954318571930533
|
|
|
|
key: test_precision
|
|
value: [0.88235294 0.775 0.88 0.86111111 0.96153846 0.96875
|
|
0.9375 0.86486486 1. 0.87878788]
|
|
|
|
mean value: 0.9009905257478787
|
|
|
|
key: train_precision
|
|
value: [0.9205298 0.9044586 0.95530726 0.94983278 0.95057034 0.9264214
|
|
0.92666667 0.91089109 0.96402878 0.96336996]
|
|
|
|
mean value: 0.9372076681551739
|
|
|
|
key: test_recall
|
|
value: [0.96774194 1. 0.70967742 1. 0.80645161 0.96875
|
|
0.9375 1. 0.5 0.90625 ]
|
|
|
|
mean value: 0.8796370967741935
|
|
|
|
key: train_recall
|
|
value: [0.97887324 1. 0.60211268 1. 0.88028169 0.97879859
|
|
0.98233216 0.97526502 0.47349823 0.92932862]
|
|
|
|
mean value: 0.8800490220474793
|
|
|
|
key: test_roc_auc
|
|
value: [0.92137097 0.859375 0.80796371 0.921875 0.88760081 0.96824597
|
|
0.93649194 0.91935484 0.75 0.88860887]
|
|
|
|
mean value: 0.8860887096774194
|
|
|
|
key: train_roc_auc
|
|
value: [0.94703379 0.94699647 0.78692206 0.97349823 0.91717265 0.9506669
|
|
0.95243368 0.9400973 0.7279463 0.94705868]
|
|
|
|
mean value: 0.9089826058826457
|
|
|
|
key: test_jcc
|
|
value: [0.85714286 0.775 0.64705882 0.86111111 0.78125 0.93939394
|
|
0.88235294 0.86486486 0.5 0.80555556]
|
|
|
|
mean value: 0.791373009277421
|
|
|
|
key: train_jcc
|
|
value: [0.9025974 0.9044586 0.58561644 0.94983278 0.84175084 0.90819672
|
|
0.91147541 0.89032258 0.46527778 0.89761092]
|
|
|
|
mean value: 0.8257139468422443
|
|
|
|
MCC on Blind test: 0.25
|
|
|
|
Accuracy on Blind test: 0.52
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02992511 0.02500296 0.02088141 0.0188849 0.0233717 0.02267003
|
|
0.02508283 0.0256896 0.02227998 0.02116513]
|
|
|
|
mean value: 0.02349536418914795
|
|
|
|
key: score_time
|
|
value: [0.01238775 0.01213789 0.01211381 0.01553416 0.01210475 0.01209593
|
|
0.01216555 0.01208258 0.01213145 0.01204014]
|
|
|
|
mean value: 0.012479400634765625
|
|
|
|
key: test_mcc
|
|
value: [0.62249498 0.72098341 0.87487431 0.74634526 0.88034084 0.87988269
|
|
0.93832585 0.80947581 0.93832585 0.87988269]
|
|
|
|
mean value: 0.8290931696483488
|
|
|
|
key: train_mcc
|
|
value: [0.80656461 0.82456585 0.89864409 0.77705923 0.93839442 0.91539508
|
|
0.88649118 0.89472984 0.83984461 0.92192075]
|
|
|
|
mean value: 0.8703609656554041
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.84126984 0.93650794 0.85714286 0.93650794 0.93650794
|
|
0.96825397 0.9047619 0.96825397 0.93650794]
|
|
|
|
mean value: 0.9063492063492063
|
|
|
|
key: train_accuracy
|
|
value: [0.89417989 0.9047619 0.94885362 0.87654321 0.96825397 0.95590829
|
|
0.94003527 0.94708995 0.91358025 0.95943563]
|
|
|
|
mean value: 0.9308641975308641
|
|
|
|
key: test_fscore
|
|
value: [0.81578947 0.86111111 0.9375 0.87323944 0.93939394 0.94117647
|
|
0.96969697 0.90625 0.96969697 0.94117647]
|
|
|
|
mean value: 0.9155030841379389
|
|
|
|
key: train_fscore
|
|
value: [0.9044586 0.91318328 0.95008606 0.89028213 0.96928328 0.95769882
|
|
0.94333333 0.94604317 0.9203252 0.96095076]
|
|
|
|
mean value: 0.9355644626727246
|
|
|
|
key: test_precision
|
|
value: [0.68888889 0.75609756 0.90909091 0.775 0.88571429 0.88888889
|
|
0.94117647 0.90625 0.94117647 0.88888889]
|
|
|
|
mean value: 0.8581172363623941
|
|
|
|
key: train_precision
|
|
value: [0.8255814 0.84023669 0.92929293 0.80225989 0.94039735 0.91883117
|
|
0.89274448 0.96336996 0.85240964 0.9248366 ]
|
|
|
|
mean value: 0.8889960100589133
|
|
|
|
key: test_recall
|
|
value: [1. 1. 0.96774194 1. 1. 1.
|
|
1. 0.90625 1. 1. ]
|
|
|
|
mean value: 0.9873991935483871
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.97183099 1. 1. 1.
|
|
1. 0.92932862 1. 1. ]
|
|
|
|
mean value: 0.990115960782362
|
|
|
|
key: test_roc_auc
|
|
value: [0.78125 0.84375 0.93699597 0.859375 0.9375 0.93548387
|
|
0.96774194 0.9047379 0.96774194 0.93548387]
|
|
|
|
mean value: 0.9070060483870968
|
|
|
|
key: train_roc_auc
|
|
value: [0.89399293 0.90459364 0.94881302 0.87632509 0.96819788 0.95598592
|
|
0.94014085 0.94705868 0.91373239 0.95950704]
|
|
|
|
mean value: 0.9308347434429901
|
|
|
|
key: test_jcc
|
|
value: [0.68888889 0.75609756 0.88235294 0.775 0.88571429 0.88888889
|
|
0.94117647 0.82857143 0.94117647 0.88888889]
|
|
|
|
mean value: 0.8476755824280932
|
|
|
|
key: train_jcc
|
|
value: [0.8255814 0.84023669 0.90491803 0.80225989 0.94039735 0.91883117
|
|
0.89274448 0.89761092 0.85240964 0.9248366 ]
|
|
|
|
mean value: 0.8799826162214832
|
|
|
|
MCC on Blind test: 0.17
|
|
|
|
Accuracy on Blind test: 0.42
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.19694424 0.18106914 0.18215322 0.18209958 0.18385458 0.18162251
|
|
0.18296266 0.18323588 0.18307757 0.18317103]
|
|
|
|
mean value: 0.18401904106140138
|
|
|
|
key: score_time
|
|
value: [0.01575923 0.01583099 0.0157485 0.0155642 0.01576161 0.01647067
|
|
0.01629472 0.01562262 0.01549649 0.01551867]
|
|
|
|
mean value: 0.01580677032470703
|
|
|
|
key: test_mcc
|
|
value: [0.90900317 0.82507166 0.88034084 0.88034084 0.88034084 0.87988269
|
|
0.93832585 0.96871896 0.96871896 0.93832585]
|
|
|
|
mean value: 0.9069069676186285
|
|
|
|
key: train_mcc
|
|
value: [0.99647883 0.98947316 0.98947316 0.99296993 0.99296993 0.9929701
|
|
0.99647887 0.9929701 0.9859891 0.99647887]
|
|
|
|
mean value: 0.9926252060978803
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.9047619 0.93650794 0.93650794 0.93650794 0.93650794
|
|
0.96825397 0.98412698 0.98412698 0.96825397]
|
|
|
|
mean value: 0.9507936507936507
|
|
|
|
key: train_accuracy
|
|
value: [0.99823633 0.99470899 0.99470899 0.99647266 0.99647266 0.99647266
|
|
0.99823633 0.99647266 0.99294533 0.99823633]
|
|
|
|
mean value: 0.9962962962962962
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.91176471 0.93939394 0.93939394 0.93939394 0.94117647
|
|
0.96969697 0.98461538 0.98461538 0.96969697]
|
|
|
|
mean value: 0.9533593857123269
|
|
|
|
key: train_fscore
|
|
value: [0.99824253 0.99474606 0.99474606 0.99649123 0.99649123 0.99647887
|
|
0.99823633 0.99647887 0.99298246 0.99823633]
|
|
|
|
mean value: 0.9963129971743934
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.83783784 0.88571429 0.88571429 0.88571429 0.88888889
|
|
0.94117647 0.96969697 0.96969697 0.94117647]
|
|
|
|
mean value: 0.9117381170322346
|
|
|
|
key: train_precision
|
|
value: [0.99649123 0.98954704 0.98954704 0.99300699 0.99300699 0.99298246
|
|
0.99647887 0.99298246 0.98606272 0.99647887]
|
|
|
|
mean value: 0.9926584667268823
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.90625 0.9375 0.9375 0.9375 0.93548387
|
|
0.96774194 0.98387097 0.98387097 0.96774194]
|
|
|
|
mean value: 0.9510584677419355
|
|
|
|
key: train_roc_auc
|
|
value: [0.99823322 0.99469965 0.99469965 0.99646643 0.99646643 0.99647887
|
|
0.99823944 0.99647887 0.99295775 0.99823944]
|
|
|
|
mean value: 0.9962959737221918
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.83783784 0.88571429 0.88571429 0.88571429 0.88888889
|
|
0.94117647 0.96969697 0.96969697 0.94117647]
|
|
|
|
mean value: 0.9117381170322346
|
|
|
|
key: train_jcc
|
|
value: [0.99649123 0.98954704 0.98954704 0.99300699 0.99300699 0.99298246
|
|
0.99647887 0.99298246 0.98606272 0.99647887]
|
|
|
|
mean value: 0.9926584667268823
|
|
|
|
MCC on Blind test: 0.21
|
|
|
|
Accuracy on Blind test: 0.45
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.07920384 0.08957911 0.09898543 0.08350778 0.1131115 0.09911609
|
|
0.09479165 0.10650468 0.10634279 0.10527992]
|
|
|
|
mean value: 0.09764227867126465
|
|
|
|
key: score_time
|
|
value: [0.02535343 0.02429223 0.02220082 0.03247929 0.03353858 0.040025
|
|
0.04365349 0.03761458 0.02447486 0.03668809]
|
|
|
|
mean value: 0.03203203678131104
|
|
|
|
key: test_mcc
|
|
value: [0.93844649 0.93844649 0.85238636 0.90900317 0.93844649 0.93832585
|
|
0.96871896 0.93832585 0.93832585 0.90873893]
|
|
|
|
mean value: 0.9269164458381627
|
|
|
|
key: train_mcc
|
|
value: [1. 0.99647883 0.99647883 0.99647883 1. 1.
|
|
1. 1. 0.99647887 1. ]
|
|
|
|
mean value: 0.9985915361756411
|
|
|
|
key: test_accuracy
|
|
value: [0.96825397 0.96825397 0.92063492 0.95238095 0.96825397 0.96825397
|
|
0.98412698 0.96825397 0.96825397 0.95238095]
|
|
|
|
mean value: 0.9619047619047619
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99823633 0.99823633 0.99823633 1. 1.
|
|
1. 1. 0.99823633 1. ]
|
|
|
|
mean value: 0.9992945326278659
|
|
|
|
key: test_fscore
|
|
value: [0.96875 0.96875 0.92537313 0.95384615 0.96875 0.96969697
|
|
0.98461538 0.96969697 0.96969697 0.95522388]
|
|
|
|
mean value: 0.9634399462477821
|
|
|
|
key: train_fscore
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:747: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_bagging.py:753: RuntimeWarning: invalid value encountered in true_divide
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis]
|
|
[1. 0.99824253 0.99824253 0.99824253 1. 1.
|
|
1. 1. 0.99823633 1. ]
|
|
|
|
mean value: 0.9992963923836801
|
|
|
|
key: test_precision
|
|
value: [0.93939394 0.93939394 0.86111111 0.91176471 0.93939394 0.94117647
|
|
0.96969697 0.94117647 0.94117647 0.91428571]
|
|
|
|
mean value: 0.9298569730922672
|
|
|
|
key: train_precision
|
|
value: [1. 0.99649123 0.99649123 0.99649123 1. 1.
|
|
1. 1. 0.99647887 1. ]
|
|
|
|
mean value: 0.9985952557449963
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.96875 0.96875 0.921875 0.953125 0.96875 0.96774194
|
|
0.98387097 0.96774194 0.96774194 0.9516129 ]
|
|
|
|
mean value: 0.9619959677419355
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.99823322 0.99823322 0.99823322 1. 1.
|
|
1. 1. 0.99823944 1. ]
|
|
|
|
mean value: 0.9992939083262828
|
|
|
|
key: test_jcc
|
|
value: [0.93939394 0.93939394 0.86111111 0.91176471 0.93939394 0.94117647
|
|
0.96969697 0.94117647 0.94117647 0.91428571]
|
|
|
|
mean value: 0.9298569730922672
|
|
|
|
key: train_jcc
|
|
value: [1. 0.99649123 0.99649123 0.99649123 1. 1.
|
|
1. 1. 0.99647887 1. ]
|
|
|
|
mean value: 0.9985952557449963
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.43
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22267103 0.23485088 0.25174284 0.21041656 0.19382739 0.18452621
|
|
0.21485114 0.24440217 0.24101925 0.25608635]
|
|
|
|
mean value: 0.22543938159942628
|
|
|
|
key: score_time
|
|
value: [0.02726507 0.01614642 0.027174 0.02749252 0.01608896 0.01612878
|
|
0.02703214 0.02717924 0.02714157 0.02704191]
|
|
|
|
mean value: 0.02386906147003174
|
|
|
|
key: test_mcc
|
|
value: [0.90900317 0.85238636 0.85238636 0.82507166 0.85238636 0.93832585
|
|
0.93832585 0.90873893 1. 0.82408564]
|
|
|
|
mean value: 0.8900710184433863
|
|
|
|
key: train_mcc
|
|
value: [0.96192098 0.96874043 0.96192098 0.97560496 0.96532513 0.96532937
|
|
0.96874387 0.97216973 0.96532937 0.97216973]
|
|
|
|
mean value: 0.9677254536391215
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.92063492 0.92063492 0.9047619 0.92063492 0.96825397
|
|
0.96825397 0.95238095 1. 0.9047619 ]
|
|
|
|
mean value: 0.9412698412698413
|
|
|
|
key: train_accuracy
|
|
value: [0.98059965 0.98412698 0.98059965 0.98765432 0.98236332 0.98236332
|
|
0.98412698 0.98589065 0.98236332 0.98589065]
|
|
|
|
mean value: 0.9835978835978836
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.92537313 0.92537313 0.91176471 0.92537313 0.96969697
|
|
0.96969697 0.95522388 1. 0.91428571]
|
|
|
|
mean value: 0.945063379699025
|
|
|
|
key: train_fscore
|
|
value: [0.98100173 0.98440208 0.98100173 0.98782609 0.98269896 0.98263889
|
|
0.98434783 0.98606272 0.98263889 0.98606272]
|
|
|
|
mean value: 0.9838681622253179
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.86111111 0.86111111 0.83783784 0.86111111 0.94117647
|
|
0.94117647 0.91428571 1. 0.84210526]
|
|
|
|
mean value: 0.8971679795673604
|
|
|
|
key: train_precision
|
|
value: [0.96271186 0.96928328 0.96271186 0.97594502 0.96598639 0.96587031
|
|
0.96917808 0.97250859 0.96587031 0.97250859]
|
|
|
|
mean value: 0.9682574295660861
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.921875 0.921875 0.90625 0.921875 0.96774194
|
|
0.96774194 0.9516129 1. 0.90322581]
|
|
|
|
mean value: 0.9415322580645161
|
|
|
|
key: train_roc_auc
|
|
value: [0.98056537 0.98409894 0.98056537 0.98763251 0.98233216 0.98239437
|
|
0.98415493 0.98591549 0.98239437 0.98591549]
|
|
|
|
mean value: 0.9835968994177077
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.86111111 0.86111111 0.83783784 0.86111111 0.94117647
|
|
0.94117647 0.91428571 1. 0.84210526]
|
|
|
|
mean value: 0.8971679795673604
|
|
|
|
key: train_jcc
|
|
value: [0.96271186 0.96928328 0.96271186 0.97594502 0.96598639 0.96587031
|
|
0.96917808 0.97250859 0.96587031 0.97250859]
|
|
|
|
mean value: 0.9682574295660861
|
|
|
|
MCC on Blind test: 0.19
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.71093893 0.7003665 0.6992557 0.70088744 0.69449234 0.69777536
|
|
0.69741082 0.70173669 0.71179318 0.71567655]
|
|
|
|
mean value: 0.7030333518981934
|
|
|
|
key: score_time
|
|
value: [0.00938368 0.00924563 0.00912905 0.00924134 0.00927377 0.00905395
|
|
0.00909781 0.00927424 0.01067686 0.01005054]
|
|
|
|
mean value: 0.009442687034606934
|
|
|
|
key: test_mcc
|
|
value: [0.90900317 0.88034084 0.82507166 0.85238636 0.96875 0.87988269
|
|
1. 0.96871896 0.90873893 0.90873893]
|
|
|
|
mean value: 0.9101631548615549
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.95238095 0.93650794 0.9047619 0.92063492 0.98412698 0.93650794
|
|
1. 0.98412698 0.95238095 0.95238095]
|
|
|
|
mean value: 0.9523809523809523
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.95384615 0.93939394 0.91176471 0.92537313 0.98412698 0.94117647
|
|
1. 0.98461538 0.95522388 0.95522388]
|
|
|
|
mean value: 0.9550744533975438
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.91176471 0.88571429 0.83783784 0.86111111 0.96875 0.88888889
|
|
1. 0.96969697 0.91428571 0.91428571]
|
|
|
|
mean value: 0.9152335227702875
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.953125 0.9375 0.90625 0.921875 0.984375 0.93548387
|
|
1. 0.98387097 0.9516129 0.9516129 ]
|
|
|
|
mean value: 0.952570564516129
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.91176471 0.88571429 0.83783784 0.86111111 0.96875 0.88888889
|
|
1. 0.96969697 0.91428571 0.91428571]
|
|
|
|
mean value: 0.9152335227702875
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.4
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03732729 0.03496265 0.05464745 0.06459832 0.03326321 0.03167319
|
|
0.03241444 0.03187943 0.05406642 0.03228021]
|
|
|
|
mean value: 0.040711259841918944
|
|
|
|
key: score_time
|
|
value: [0.01258993 0.01269913 0.02292609 0.0152688 0.02376914 0.0172925
|
|
0.0151341 0.01527381 0.01266146 0.02108312]
|
|
|
|
mean value: 0.01686980724334717
|
|
|
|
key: test_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.34
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02436829 0.03973722 0.03939223 0.03955579 0.03411579 0.03975797
|
|
0.03953481 0.0474658 0.03978467 0.04177952]
|
|
|
|
mean value: 0.03854920864105225
|
|
|
|
key: score_time
|
|
value: [0.02713418 0.02409577 0.01889348 0.0190475 0.01884246 0.0188694
|
|
0.01882482 0.0188911 0.01892948 0.01888418]
|
|
|
|
mean value: 0.02024123668670654
|
|
|
|
key: test_mcc
|
|
value: [0.82507166 0.79833297 0.85238636 0.79833297 0.74772995 0.85168687
|
|
0.93832585 0.87988269 0.93832585 0.85168687]
|
|
|
|
mean value: 0.8481762042802568
|
|
|
|
key: train_mcc
|
|
value: [0.90887831 0.90564657 0.90887831 0.92517842 0.88703618 0.90567804
|
|
0.91214664 0.91214664 0.90245767 0.90890766]
|
|
|
|
mean value: 0.9076954429264016
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.88888889 0.92063492 0.88888889 0.87301587 0.92063492
|
|
0.96825397 0.93650794 0.96825397 0.92063492]
|
|
|
|
mean value: 0.919047619047619
|
|
|
|
key: train_accuracy
|
|
value: [0.95238095 0.95061728 0.95238095 0.96119929 0.94179894 0.95061728
|
|
0.95414462 0.95414462 0.94885362 0.95238095]
|
|
|
|
mean value: 0.9518518518518518
|
|
|
|
key: test_fscore
|
|
value: [0.91176471 0.89855072 0.92537313 0.89855072 0.875 0.92753623
|
|
0.96969697 0.94117647 0.96969697 0.92753623]
|
|
|
|
mean value: 0.9244882163236364
|
|
|
|
key: train_fscore
|
|
value: [0.95462185 0.95302013 0.95462185 0.96271186 0.94435076 0.95286195
|
|
0.95608108 0.95608108 0.9512605 0.9544688 ]
|
|
|
|
mean value: 0.9540079876891189
|
|
|
|
key: test_precision
|
|
value: [0.83783784 0.81578947 0.86111111 0.81578947 0.84848485 0.86486486
|
|
0.94117647 0.88888889 0.94117647 0.86486486]
|
|
|
|
mean value: 0.8679984304597308
|
|
|
|
key: train_precision
|
|
value: [0.91318328 0.91025641 0.91318328 0.92810458 0.90614887 0.90996785
|
|
0.91585761 0.91585761 0.90705128 0.91290323]
|
|
|
|
mean value: 0.913251397609214
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.90322581 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9903225806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98591549 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985915492957746
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.890625 0.921875 0.890625 0.8734879 0.91935484
|
|
0.96774194 0.93548387 0.96774194 0.91935484]
|
|
|
|
mean value: 0.9192540322580646
|
|
|
|
key: train_roc_auc
|
|
value: [0.95229682 0.95053004 0.95229682 0.96113074 0.941721 0.95070423
|
|
0.95422535 0.95422535 0.94894366 0.95246479]
|
|
|
|
mean value: 0.9518538794605086
|
|
|
|
key: test_jcc
|
|
value: [0.83783784 0.81578947 0.86111111 0.81578947 0.77777778 0.86486486
|
|
0.94117647 0.88888889 0.94117647 0.86486486]
|
|
|
|
mean value: 0.8609277233890237
|
|
|
|
key: /home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:195: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./katg_rt.py:198: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
train_jcc
|
|
value: [0.91318328 0.91025641 0.91318328 0.92810458 0.89456869 0.90996785
|
|
0.91585761 0.91585761 0.90705128 0.91290323]
|
|
|
|
mean value: 0.9120933798874071
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_ppi2_affinity', 'interface_dist',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=168)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.292521 0.35826349 0.29794788 0.29545045 0.29894447 0.29038882
|
|
0.2892828 0.31287646 0.34191179 0.28921151]
|
|
|
|
mean value: 0.30667986869812014
|
|
|
|
key: score_time
|
|
value: [0.01879334 0.0186286 0.0186801 0.01862216 0.0193069 0.01874804
|
|
0.01870394 0.01886249 0.01872683 0.01910758]
|
|
|
|
mean value: 0.018817996978759764
|
|
|
|
key: test_mcc
|
|
value: [0.82507166 0.79833297 0.79833297 0.79833297 0.74772995 0.85168687
|
|
0.93832585 0.87988269 0.93832585 0.85168687]
|
|
|
|
mean value: 0.8427708653814082
|
|
|
|
key: train_mcc
|
|
value: [0.90887831 0.88962366 0.88644555 0.92517842 0.88703618 0.90567804
|
|
0.91214664 0.91214664 0.90245767 0.90890766]
|
|
|
|
mean value: 0.903849876437126
|
|
|
|
key: test_accuracy
|
|
value: [0.9047619 0.88888889 0.88888889 0.88888889 0.87301587 0.92063492
|
|
0.96825397 0.93650794 0.96825397 0.92063492]
|
|
|
|
mean value: 0.9158730158730158
|
|
|
|
key: train_accuracy
|
|
value: [0.95238095 0.94179894 0.94003527 0.96119929 0.94179894 0.95061728
|
|
0.95414462 0.95414462 0.94885362 0.95238095]
|
|
|
|
mean value: 0.9497354497354498
|
|
|
|
key: test_fscore
|
|
value: [0.91176471 0.89855072 0.89855072 0.89855072 0.875 0.92753623
|
|
0.96969697 0.94117647 0.96969697 0.92753623]
|
|
|
|
mean value: 0.9218059753545687
|
|
|
|
key: train_fscore
|
|
value: [0.95462185 0.94509151 0.94352159 0.96271186 0.94435076 0.95286195
|
|
0.95608108 0.95608108 0.9512605 0.9544688 ]
|
|
|
|
mean value: 0.9521051002750985
|
|
|
|
key: test_precision
|
|
value: [0.83783784 0.81578947 0.81578947 0.81578947 0.84848485 0.86486486
|
|
0.94117647 0.88888889 0.94117647 0.86486486]
|
|
|
|
mean value: 0.8634662667170407
|
|
|
|
key: train_precision
|
|
value: [0.91318328 0.89589905 0.89308176 0.92810458 0.90614887 0.90996785
|
|
0.91585761 0.91585761 0.90705128 0.91290323]
|
|
|
|
mean value: 0.9098055100727014
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.90322581 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9903225806451613
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.98591549 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985915492957746
|
|
|
|
key: test_roc_auc
|
|
value: [0.90625 0.890625 0.890625 0.890625 0.8734879 0.91935484
|
|
0.96774194 0.93548387 0.96774194 0.91935484]
|
|
|
|
mean value: 0.9161290322580645
|
|
|
|
key: train_roc_auc
|
|
value: [0.95229682 0.94169611 0.93992933 0.96113074 0.941721 0.95070423
|
|
0.95422535 0.95422535 0.94894366 0.95246479]
|
|
|
|
mean value: 0.9497337381177524
|
|
|
|
key: test_jcc
|
|
value: [0.83783784 0.81578947 0.81578947 0.81578947 0.77777778 0.86486486
|
|
0.94117647 0.88888889 0.94117647 0.86486486]
|
|
|
|
mean value: 0.8563955596463336
|
|
|
|
key: train_jcc
|
|
value: [0.91318328 0.89589905 0.89308176 0.92810458 0.89456869 0.90996785
|
|
0.91585761 0.91585761 0.90705128 0.91290323]
|
|
|
|
mean value: 0.9086474923508945
|
|
|
|
MCC on Blind test: 0.28
|
|
|
|
Accuracy on Blind test: 0.54
|