18829 lines
898 KiB
Text
18829 lines
898 KiB
Text
/home/tanu/git/LSHTM_analysis/scripts/ml/ml_data_orig.py:550: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
mask_check.sort_values(by = ['ligand_distance'], ascending = True, inplace = True)
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/xgboost/compat.py:36: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
|
|
from pandas import MultiIndex, Int64Index
|
|
1.22.4
|
|
1.4.1
|
|
|
|
aaindex_df contains non-numerical data
|
|
|
|
Total no. of non-numerial columns: 2
|
|
|
|
Selecting numerical data only
|
|
|
|
PASS: successfully selected numerical columns only for aaindex_df
|
|
|
|
Now checking for NA in the remaining aaindex_cols
|
|
|
|
Counting aaindex_df cols with NA
|
|
ncols with NA: 4 columns
|
|
Dropping these...
|
|
Original ncols: 127
|
|
|
|
Revised df ncols: 123
|
|
|
|
Checking NA in revised df...
|
|
|
|
PASS: cols with NA successfully dropped from aaindex_df
|
|
Proceeding with combining aa_df with other features_df
|
|
|
|
PASS: ncols match
|
|
Expected ncols: 123
|
|
Got: 123
|
|
|
|
Total no. of columns in clean aa_df: 123
|
|
|
|
Proceeding to merge, expected nrows in merged_df: 531
|
|
|
|
PASS: my_features_df and aa_df successfully combined
|
|
nrows: 531
|
|
ncols: 286
|
|
count of NULL values before imputation
|
|
|
|
or_mychisq 263
|
|
log10_or_mychisq 263
|
|
dtype: int64
|
|
count of NULL values AFTER imputation
|
|
|
|
mutationinformation 0
|
|
or_rawI 0
|
|
logorI 0
|
|
dtype: int64
|
|
|
|
PASS: OR values imputed, data ready for ML
|
|
|
|
Total no. of features for aaindex: 123
|
|
|
|
No. of numerical features: 167
|
|
No. of categorical features: 7
|
|
|
|
index: 0
|
|
ind: 1
|
|
|
|
Mask count check: True
|
|
|
|
index: 1
|
|
ind: 2
|
|
|
|
Mask count check: True
|
|
Original Data
|
|
Counter({0: 76, 1: 43}) Data dim: (119, 174)
|
|
|
|
-------------------------------------------------------------
|
|
Successfully split data: ORIGINAL training
|
|
actual values: training set
|
|
imputed values: blind test set
|
|
Train data size: (119, 174)
|
|
Test data size: (412, 174)
|
|
y_train numbers: Counter({0: 76, 1: 43})
|
|
y_train ratio: 1.7674418604651163
|
|
|
|
y_test_numbers: Counter({0: 409, 1: 3})
|
|
y_test ratio: 136.33333333333334
|
|
-------------------------------------------------------------
|
|
Simple Random OverSampling
|
|
Counter({0: 76, 1: 76})
|
|
(152, 174)
|
|
Simple Random UnderSampling
|
|
Counter({0: 43, 1: 43})
|
|
(86, 174)
|
|
Simple Combined Over and UnderSampling
|
|
Counter({0: 76, 1: 76})
|
|
(152, 174)
|
|
SMOTE_NC OverSampling
|
|
Counter({0: 76, 1: 76})
|
|
(152, 174)
|
|
|
|
#####################################################################
|
|
|
|
Running ML analysis: ORIGINAL
|
|
Gene name: gid
|
|
Drug name: streptomycin
|
|
|
|
Output directory: /home/tanu/git/Data/streptomycin/output/ml/tts_orig/
|
|
|
|
Sanity checks:
|
|
Total input features: 174
|
|
|
|
Training data size: (119, 174)
|
|
Test data size: (412, 174)
|
|
|
|
Target feature numbers (training data): Counter({0: 76, 1: 43})
|
|
Target features ratio (training data: 1.7674418604651163
|
|
|
|
Target feature numbers (test data): Counter({0: 409, 1: 3})
|
|
Target features ratio (test data): 136.33333333333334
|
|
|
|
#####################################################################
|
|
|
|
|
|
================================================================
|
|
|
|
Strucutral features (n): 35
|
|
These are:
|
|
Common stablity features: ['ligand_distance', 'ligand_affinity_change', 'duet_stability_change', 'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts', 'mcsm_na_affinity']
|
|
FoldX columns: ['electro_rr', 'electro_mm', 'electro_sm', 'electro_ss', 'disulfide_rr', 'disulfide_mm', 'disulfide_sm', 'disulfide_ss', 'hbonds_rr', 'hbonds_mm', 'hbonds_sm', 'hbonds_ss', 'partcov_rr', 'partcov_mm', 'partcov_sm', 'partcov_ss', 'vdwclashes_rr', 'vdwclashes_mm', 'vdwclashes_sm', 'vdwclashes_ss', 'volumetric_rr', 'volumetric_mm', 'volumetric_ss']
|
|
Other struc columns: ['rsa', 'kd_values', 'rd_values']
|
|
================================================================
|
|
|
|
AAindex features (n): 123
|
|
These are:
|
|
['ALTS910101', 'AZAE970101', 'AZAE970102', 'BASU010101', 'BENS940101', 'BENS940102', 'BENS940103', 'BENS940104', 'BETM990101', 'BLAJ010101', 'BONM030101', 'BONM030102', 'BONM030103', 'BONM030104', 'BONM030105', 'BONM030106', 'BRYS930101', 'CROG050101', 'CSEM940101', 'DAYM780301', 'DAYM780302', 'DOSZ010101', 'DOSZ010102', 'DOSZ010103', 'DOSZ010104', 'FEND850101', 'FITW660101', 'GEOD900101', 'GIAG010101', 'GONG920101', 'GRAR740104', 'HENS920101', 'HENS920102', 'HENS920103', 'HENS920104', 'JOHM930101', 'JOND920103', 'JOND940101', 'KANM000101', 'KAPO950101', 'KESO980101', 'KESO980102', 'KOLA920101', 'KOLA930101', 'KOSJ950100_RSA_SST', 'KOSJ950100_SST', 'KOSJ950110_RSA', 'KOSJ950115', 'LEVJ860101', 'LINK010101', 'LIWA970101', 'LUTR910101', 'LUTR910102', 'LUTR910103', 'LUTR910104', 'LUTR910105', 'LUTR910106', 'LUTR910107', 'LUTR910108', 'LUTR910109', 'MCLA710101', 'MCLA720101', 'MEHP950102', 'MICC010101', 'MIRL960101', 'MIYS850102', 'MIYS850103', 'MIYS930101', 'MIYS960101', 'MIYS960102', 'MIYS960103', 'MIYS990106', 'MIYS990107', 'MIYT790101', 'MOHR870101', 'MOOG990101', 'MUET010101', 'MUET020101', 'MUET020102', 'NAOD960101', 'NGPC000101', 'NIEK910101', 'NIEK910102', 'OGAK980101', 'OVEJ920100_RSA', 'OVEJ920101', 'OVEJ920102', 'OVEJ920103', 'PRLA000101', 'PRLA000102', 'QUIB020101', 'QU_C930101', 'QU_C930102', 'QU_C930103', 'RIER950101', 'RISJ880101', 'RUSR970101', 'RUSR970102', 'RUSR970103', 'SIMK990101', 'SIMK990102', 'SIMK990103', 'SIMK990104', 'SIMK990105', 'SKOJ000101', 'SKOJ000102', 'SKOJ970101', 'TANS760101', 'TANS760102', 'THOP960101', 'TOBD000101', 'TOBD000102', 'TUDE900101', 'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101', 'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106']
|
|
================================================================
|
|
|
|
Evolutionary features (n): 3
|
|
These are:
|
|
['consurf_score', 'snap2_score', 'provean_score']
|
|
================================================================
|
|
|
|
Genomic features (n): 6
|
|
These are:
|
|
['maf', 'logorI']
|
|
['lineage_proportion', 'dist_lineage_proportion', 'lineage_count_all', 'lineage_count_unique']
|
|
================================================================
|
|
|
|
Categorical features (n): 7
|
|
These are:
|
|
['ss_class', 'aa_prop_change', 'electrostatics_change', 'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site']
|
|
================================================================
|
|
|
|
|
|
Pass: No. of features match
|
|
|
|
#####################################################################
|
|
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03334117 0.02475882 0.02577257 0.02525377 0.02861619 0.02571344
|
|
0.02372122 0.02629876 0.03418589 0.02536702]
|
|
|
|
mean value: 0.027302885055541994
|
|
|
|
key: score_time
|
|
value: [0.01207638 0.01153398 0.01153016 0.0115757 0.01153898 0.011554
|
|
0.0115602 0.01156116 0.01151729 0.01161742]
|
|
|
|
mean value: 0.011606526374816895
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.25 0. 0.63245553 0.47809144 0.25
|
|
0.07559289 0.68313005 0.65714286 0.21428571]
|
|
|
|
mean value: 0.38731540248954993
|
|
|
|
key: train_mcc
|
|
value: [0.83974018 0.90115642 0.90115642 0.85945065 0.87925006 0.85945065
|
|
0.81756015 0.85757194 0.91818969 0.87982296]
|
|
|
|
mean value: 0.8713349127582828
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.66666667 0.5 0.83333333 0.75 0.66666667
|
|
0.58333333 0.83333333 0.83333333 0.63636364]
|
|
|
|
mean value: 0.7136363636363636
|
|
|
|
key: train_accuracy
|
|
value: [0.92523364 0.95327103 0.95327103 0.93457944 0.94392523 0.93457944
|
|
0.91588785 0.93457944 0.96261682 0.94444444]
|
|
|
|
mean value: 0.940238836967809
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.5 0.4 0.66666667 0.66666667 0.5
|
|
0.28571429 0.75 0.8 0.5 ]
|
|
|
|
mean value: 0.5735714285714285
|
|
|
|
key: train_fscore
|
|
value: [0.88888889 0.93150685 0.93150685 0.90410959 0.91891892 0.90410959
|
|
0.86956522 0.90140845 0.94594595 0.91891892]
|
|
|
|
mean value: 0.911487921748053
|
|
|
|
key: test_precision
|
|
value: [1. 0.5 0.33333333 1. 0.6 0.5
|
|
0.5 1. 0.8 0.5 ]
|
|
|
|
mean value: 0.6733333333333333
|
|
|
|
key: train_precision
|
|
value: [0.96969697 1. 1. 0.97058824 0.97142857 0.97058824
|
|
0.96774194 0.96969697 0.97222222 0.97142857]
|
|
|
|
mean value: 0.976339171054541
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.5 0.5 0.75 0.5 0.2 0.6 0.8 0.5 ]
|
|
|
|
mean value: 0.535
|
|
|
|
key: train_recall
|
|
value: [0.82051282 0.87179487 0.87179487 0.84615385 0.87179487 0.84615385
|
|
0.78947368 0.84210526 0.92105263 0.87179487]
|
|
|
|
mean value: 0.8552631578947368
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.625 0.5 0.75 0.75 0.625
|
|
0.52857143 0.8 0.82857143 0.60714286]
|
|
|
|
mean value: 0.6764285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.90290347 0.93589744 0.93589744 0.91572398 0.92854449 0.91572398
|
|
0.88749047 0.91380625 0.95327994 0.92865106]
|
|
|
|
mean value: 0.9217918517521425
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.33333333 0.25 0.5 0.5 0.33333333
|
|
0.16666667 0.6 0.66666667 0.33333333]
|
|
|
|
mean value: 0.41833333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.8 0.87179487 0.87179487 0.825 0.85 0.825
|
|
0.76923077 0.82051282 0.8974359 0.85 ]
|
|
|
|
mean value: 0.838076923076923
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.73
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.71644402 0.81135726 0.6770184 0.69487214 0.77071428 0.63311291
|
|
0.59070015 0.60373497 0.72378373 0.66717815]
|
|
|
|
mean value: 0.6888916015625
|
|
|
|
key: score_time
|
|
value: [0.01187897 0.01522493 0.01512241 0.01507711 0.0152173 0.01510835
|
|
0.01505136 0.01501942 0.01196957 0.01193142]
|
|
|
|
mean value: 0.014160084724426269
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.40824829 0.11952286 0.63245553 0.11952286 0.70710678
|
|
0.50709255 0.29277002 0.50709255 0.21428571]
|
|
|
|
mean value: 0.41405526994291353
|
|
|
|
key: train_mcc
|
|
value: [0.73774797 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.72342894]
|
|
|
|
mean value: 0.9461176913949825
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.58333333 0.83333333 0.58333333 0.83333333
|
|
0.75 0.66666667 0.75 0.63636364]
|
|
|
|
mean value: 0.7219696969696969
|
|
|
|
key: train_accuracy
|
|
value: [0.87850467 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.87037037]
|
|
|
|
mean value: 0.9748875043267566
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.57142857 0.44444444 0.66666667 0.44444444 0.8
|
|
0.72727273 0.5 0.72727273 0.5 ]
|
|
|
|
mean value: 0.6048196248196248
|
|
|
|
key: train_fscore
|
|
value: [0.8115942 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.78787879]
|
|
|
|
mean value: 0.9599472990777339
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.4 1. 0.4 0.66666667
|
|
0.66666667 0.66666667 0.66666667 0.5 ]
|
|
|
|
mean value: 0.6633333333333333
|
|
|
|
key: train_precision
|
|
value: [0.93333333 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.96296296]
|
|
|
|
mean value: 0.9896296296296296
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.5 0.5 0.5 1. 0.8 0.4 0.8 0.5]
|
|
|
|
mean value: 0.6
|
|
|
|
key: train_recall
|
|
value: [0.71794872 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.66666667]
|
|
|
|
mean value: 0.9384615384615385
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.6875 0.5625 0.75 0.5625 0.875
|
|
0.75714286 0.62857143 0.75714286 0.60714286]
|
|
|
|
mean value: 0.69375
|
|
|
|
key: train_roc_auc
|
|
value: [0.84426848 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.82608696]
|
|
|
|
mean value: 0.9670355433143157
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4 0.28571429 0.5 0.28571429 0.66666667
|
|
0.57142857 0.33333333 0.57142857 0.33333333]
|
|
|
|
mean value: 0.44476190476190475
|
|
|
|
key: train_jcc
|
|
value: [0.68292683 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.65 ]
|
|
|
|
mean value: 0.9332926829268293
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01205301 0.01168513 0.00886297 0.00880456 0.00827646 0.00836802
|
|
0.00829816 0.00849175 0.00859928 0.00846052]
|
|
|
|
mean value: 0.009189987182617187
|
|
|
|
key: score_time
|
|
value: [0.01187491 0.00991917 0.00889611 0.00851369 0.00854278 0.00852203
|
|
0.00846195 0.00852776 0.00884151 0.00848675]
|
|
|
|
mean value: 0.009058666229248048
|
|
|
|
key: test_mcc
|
|
value: [ 0.31622777 0. 0.5 0.125 0. 0.
|
|
-0.37142857 0.23904572 0.23904572 -0.38575837]
|
|
|
|
mean value: 0.06621322634167942
|
|
|
|
key: train_mcc
|
|
value: [0.26480296 0.37487168 0.27967347 0.22683426 0.33144796 0.39638569
|
|
0.40772448 0.3171935 0.30927906 0.36454849]
|
|
|
|
mean value: 0.3272761552505757
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.5 0.66666667 0.5 0.41666667 0.41666667
|
|
0.33333333 0.58333333 0.58333333 0.27272727]
|
|
|
|
mean value: 0.4772727272727273
|
|
|
|
key: train_accuracy
|
|
value: [0.57943925 0.62616822 0.57943925 0.54205607 0.61682243 0.6728972
|
|
0.68224299 0.64485981 0.58878505 0.62962963]
|
|
|
|
mean value: 0.6162339910003461
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.4 0.66666667 0.5 0.46153846 0.46153846
|
|
0.2 0.61538462 0.61538462 0.33333333]
|
|
|
|
mean value: 0.4825274725274725
|
|
|
|
key: train_fscore
|
|
value: [0.58715596 0.63636364 0.59459459 0.57391304 0.61682243 0.64646465
|
|
0.64583333 0.59574468 0.6 0.62962963]
|
|
|
|
mean value: 0.6126521957924459
|
|
|
|
key: test_precision
|
|
value: [0.4 0.33333333 0.5 0.375 0.33333333 0.33333333
|
|
0.2 0.5 0.5 0.25 ]
|
|
|
|
mean value: 0.3725
|
|
|
|
key: train_precision
|
|
value: [0.45714286 0.49295775 0.45833333 0.43421053 0.48529412 0.53333333
|
|
0.53448276 0.5 0.45833333 0.49275362]
|
|
|
|
mean value: 0.4846841629393674
|
|
|
|
key: test_recall
|
|
value: [1. 0.5 1. 0.75 0.75 0.75 0.2 0.8 0.8 0.5 ]
|
|
|
|
mean value: 0.705
|
|
|
|
key: train_recall
|
|
value: [0.82051282 0.8974359 0.84615385 0.84615385 0.84615385 0.82051282
|
|
0.81578947 0.73684211 0.86842105 0.87179487]
|
|
|
|
mean value: 0.8369770580296896
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.5 0.75 0.5625 0.5 0.5
|
|
0.31428571 0.61428571 0.61428571 0.32142857]
|
|
|
|
mean value: 0.5301785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.63084465 0.68401207 0.63631222 0.60690045 0.66572398 0.70437406
|
|
0.71224256 0.6655225 0.65160183 0.68227425]
|
|
|
|
mean value: 0.6639808563805737
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.25 0.5 0.33333333 0.3 0.3
|
|
0.11111111 0.44444444 0.44444444 0.2 ]
|
|
|
|
mean value: 0.3283333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.41558442 0.46666667 0.42307692 0.40243902 0.44594595 0.47761194
|
|
0.47692308 0.42424242 0.42857143 0.45945946]
|
|
|
|
mean value: 0.4420521305159092
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.35
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00877714 0.00849271 0.00963545 0.00846076 0.00849152 0.00848961
|
|
0.00851655 0.00852251 0.00855732 0.0085299 ]
|
|
|
|
mean value: 0.008647346496582031
|
|
|
|
key: score_time
|
|
value: [0.00853372 0.00901699 0.00869703 0.00842142 0.00851774 0.00851393
|
|
0.00854301 0.00848365 0.00848579 0.0085268 ]
|
|
|
|
mean value: 0.00857400894165039
|
|
|
|
key: test_mcc
|
|
value: [-0.31622777 0. -0.31622777 0.25 0.40824829 0.625
|
|
0.07559289 0.07559289 0.11952286 0.13363062]
|
|
|
|
mean value: 0.10551320295235296
|
|
|
|
key: train_mcc
|
|
value: [0.54302345 0.56482566 0.5756791 0.52383566 0.54368212 0.52105076
|
|
0.51620555 0.51327923 0.51494818 0.54491455]
|
|
|
|
mean value: 0.5361444261644434
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.58333333 0.5 0.66666667 0.75 0.83333333
|
|
0.58333333 0.58333333 0.58333333 0.63636364]
|
|
|
|
mean value: 0.621969696969697
|
|
|
|
key: train_accuracy
|
|
value: [0.79439252 0.80373832 0.80373832 0.78504673 0.79439252 0.78504673
|
|
0.78504673 0.78504673 0.78504673 0.7962963 ]
|
|
|
|
mean value: 0.79177916233991
|
|
|
|
key: test_fscore
|
|
value: [0. 0.28571429 0. 0.5 0.57142857 0.75
|
|
0.28571429 0.28571429 0.44444444 0.33333333]
|
|
|
|
mean value: 0.34563492063492063
|
|
|
|
key: train_fscore
|
|
value: [0.67647059 0.69565217 0.6557377 0.63492063 0.68571429 0.65671642
|
|
0.66666667 0.63492063 0.62295082 0.67647059]
|
|
|
|
mean value: 0.6606220515106465
|
|
|
|
key: test_precision
|
|
value: [0. 0.33333333 0. 0.5 0.66666667 0.75
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.425
|
|
|
|
key: train_precision
|
|
value: [0.79310345 0.8 0.90909091 0.83333333 0.77419355 0.78571429
|
|
0.74193548 0.8 0.82608696 0.79310345]
|
|
|
|
mean value: 0.8056561413470056
|
|
|
|
key: test_recall
|
|
value: [0. 0.25 0. 0.5 0.5 0.75 0.2 0.2 0.4 0.25]
|
|
|
|
mean value: 0.305
|
|
|
|
key: train_recall
|
|
value: [0.58974359 0.61538462 0.51282051 0.51282051 0.61538462 0.56410256
|
|
0.60526316 0.52631579 0.5 0.58974359]
|
|
|
|
mean value: 0.5631578947368421
|
|
|
|
key: test_roc_auc
|
|
value: [0.375 0.5 0.375 0.625 0.6875 0.8125
|
|
0.52857143 0.52857143 0.55714286 0.55357143]
|
|
|
|
mean value: 0.5542857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.75075415 0.76357466 0.74170437 0.72699849 0.75622172 0.73793363
|
|
0.74466056 0.72692601 0.72101449 0.75139353]
|
|
|
|
mean value: 0.7421181630546956
|
|
|
|
key: test_jcc
|
|
value: [0. 0.16666667 0. 0.33333333 0.4 0.6
|
|
0.16666667 0.16666667 0.28571429 0.2 ]
|
|
|
|
mean value: 0.2319047619047619
|
|
|
|
key: train_jcc
|
|
value: [0.51111111 0.53333333 0.48780488 0.46511628 0.52173913 0.48888889
|
|
0.5 0.46511628 0.45238095 0.51111111]
|
|
|
|
mean value: 0.4936601963448495
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.01097941 0.01172185 0.00993443 0.0093863 0.00939178 0.00932884
|
|
0.00943375 0.00923657 0.00932479 0.00890183]
|
|
|
|
mean value: 0.009763956069946289
|
|
|
|
key: score_time
|
|
value: [0.0509901 0.0338583 0.01047421 0.00983119 0.01023149 0.01005054
|
|
0.01004434 0.01029134 0.01021481 0.01026177]
|
|
|
|
mean value: 0.016624808311462402
|
|
|
|
key: test_mcc
|
|
value: [ 0.63245553 0.42640143 0.25 -0.31622777 0.81649658 0.15811388
|
|
-0.09759001 0.11952286 0.68313005 -0.23904572]
|
|
|
|
mean value: 0.24332568454998843
|
|
|
|
key: train_mcc
|
|
value: [0.45416735 0.49964579 0.50186627 0.4993366 0.40732456 0.59640798
|
|
0.5405763 0.49050631 0.42002098 0.54491455]
|
|
|
|
mean value: 0.4954766696721971
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.66666667 0.5 0.91666667 0.66666667
|
|
0.5 0.58333333 0.83333333 0.54545455]
|
|
|
|
mean value: 0.6795454545454546
|
|
|
|
key: train_accuracy
|
|
value: [0.75700935 0.77570093 0.77570093 0.77570093 0.73831776 0.81308411
|
|
0.79439252 0.77570093 0.74766355 0.7962963 ]
|
|
|
|
mean value: 0.7749567324333679
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.4 0.5 0. 0.85714286 0.33333333
|
|
0.25 0.44444444 0.75 0. ]
|
|
|
|
mean value: 0.4201587301587301
|
|
|
|
key: train_fscore
|
|
value: [0.60606061 0.625 0.61290323 0.64705882 0.5483871 0.67741935
|
|
0.63333333 0.61290323 0.54237288 0.67647059]
|
|
|
|
mean value: 0.6181909135740384
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.5 0. 1. 0.5
|
|
0.33333333 0.5 1. 0. ]
|
|
|
|
mean value: 0.5833333333333334
|
|
|
|
key: train_precision
|
|
value: [0.74074074 0.8 0.82608696 0.75862069 0.73913043 0.91304348
|
|
0.86363636 0.79166667 0.76190476 0.79310345]
|
|
|
|
mean value: 0.7987933540444785
|
|
|
|
key: test_recall
|
|
value: [0.5 0.25 0.5 0. 0.75 0.25 0.2 0.4 0.6 0. ]
|
|
|
|
mean value: 0.345
|
|
|
|
key: train_recall
|
|
value: [0.51282051 0.51282051 0.48717949 0.56410256 0.43589744 0.53846154
|
|
0.5 0.5 0.42105263 0.58974359]
|
|
|
|
mean value: 0.5062078272604589
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.625 0.625 0.375 0.875 0.5625
|
|
0.45714286 0.55714286 0.8 0.42857143]
|
|
|
|
mean value: 0.6055357142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.70493967 0.71964555 0.71417798 0.73058069 0.67383107 0.75452489
|
|
0.72826087 0.71376812 0.67429443 0.75139353]
|
|
|
|
mean value: 0.7165416800411416
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.25 0.33333333 0. 0.75 0.2
|
|
0.14285714 0.28571429 0.6 0. ]
|
|
|
|
mean value: 0.3061904761904762
|
|
|
|
key: train_jcc
|
|
value: [0.43478261 0.45454545 0.44186047 0.47826087 0.37777778 0.51219512
|
|
0.46341463 0.44186047 0.37209302 0.51111111]
|
|
|
|
mean value: 0.4487901531281146
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/neural_network/_multilayer_perceptron.py:702: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
|
|
warnings.warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01083565 0.01061916 0.01054645 0.01052928 0.01054525 0.01045609
|
|
0.00951052 0.00935054 0.01064563 0.01066136]
|
|
|
|
mean value: 0.010369992256164551
|
|
|
|
key: score_time
|
|
value: [0.00955319 0.00956869 0.00950885 0.01027656 0.00945163 0.00895619
|
|
0.0088501 0.00963807 0.00961924 0.00976944]
|
|
|
|
mean value: 0.009519195556640625
|
|
|
|
key: test_mcc
|
|
value: [-0.21320072 0.42640143 0.40824829 0. 0.42640143 0.
|
|
0.35675303 0.52915026 0.47809144 0.41833001]
|
|
|
|
mean value: 0.28301751927507457
|
|
|
|
key: train_mcc
|
|
value: [0.61356082 0.59383173 0.61356082 0.59383173 0.65250309 0.6717753
|
|
0.70511024 0.6658757 0.62613533 0.6341119 ]
|
|
|
|
mean value: 0.637029666985776
|
|
|
|
key: test_accuracy
|
|
value: [0.58333333 0.75 0.75 0.66666667 0.75 0.66666667
|
|
0.66666667 0.75 0.75 0.72727273]
|
|
|
|
mean value: 0.706060606060606
|
|
|
|
key: train_accuracy
|
|
value: [0.81308411 0.80373832 0.81308411 0.80373832 0.8317757 0.8411215
|
|
0.85981308 0.8411215 0.82242991 0.82407407]
|
|
|
|
mean value: 0.8253980616130149
|
|
|
|
key: test_fscore
|
|
value: [0. 0.4 0.57142857 0. 0.4 0.
|
|
0.33333333 0.57142857 0.66666667 0.4 ]
|
|
|
|
mean value: 0.3342857142857143
|
|
|
|
key: train_fscore
|
|
value: [0.65517241 0.63157895 0.65517241 0.63157895 0.7 0.72131148
|
|
0.75409836 0.71186441 0.66666667 0.6779661 ]
|
|
|
|
mean value: 0.6805409733529866
|
|
|
|
key: test_precision
|
|
value: [0. 1. 0.66666667 0. 1. 0.
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.6416666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0. 0.25 0.5 0. 0.25 0. 0.2 0.4 0.6 0.25]
|
|
|
|
mean value: 0.245
|
|
|
|
key: train_recall
|
|
value: [0.48717949 0.46153846 0.48717949 0.46153846 0.53846154 0.56410256
|
|
0.60526316 0.55263158 0.5 0.51282051]
|
|
|
|
mean value: 0.5170715249662619
|
|
|
|
key: test_roc_auc
|
|
value: [0.4375 0.625 0.6875 0.5 0.625 0.5
|
|
0.6 0.7 0.72857143 0.625 ]
|
|
|
|
mean value: 0.6028571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.74358974 0.73076923 0.74358974 0.73076923 0.76923077 0.78205128
|
|
0.80263158 0.77631579 0.75 0.75641026]
|
|
|
|
mean value: 0.7585357624831309
|
|
|
|
key: test_jcc
|
|
value: [0. 0.25 0.4 0. 0.25 0. 0.2 0.4 0.5 0.25]
|
|
|
|
mean value: 0.225
|
|
|
|
key: train_jcc
|
|
value: [0.48717949 0.46153846 0.48717949 0.46153846 0.53846154 0.56410256
|
|
0.60526316 0.55263158 0.5 0.51282051]
|
|
|
|
mean value: 0.5170715249662619
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.9
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.56232119 0.63862157 0.60934663 0.55800104 0.53653932 0.71582627
|
|
0.5305922 0.58970189 0.68618298 0.7097888 ]
|
|
|
|
mean value: 0.6136921882629395
|
|
|
|
key: score_time
|
|
value: [0.01231527 0.01209044 0.01212478 0.01505733 0.01235318 0.01213241
|
|
0.01209545 0.0121336 0.01381922 0.01211047]
|
|
|
|
mean value: 0.012623214721679687
|
|
|
|
key: test_mcc
|
|
value: [0.625 0.25 0.25 0.625 0.25 0.25
|
|
0.07559289 0.65714286 0.65714286 0.21428571]
|
|
|
|
mean value: 0.3854164323173274
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.66666667 0.66666667 0.83333333 0.66666667 0.66666667
|
|
0.58333333 0.83333333 0.83333333 0.63636364]
|
|
|
|
mean value: 0.7219696969696969
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.5 0.5 0.75 0.5 0.5
|
|
0.28571429 0.8 0.8 0.5 ]
|
|
|
|
mean value: 0.5885714285714286
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.5 0.5 0.75 0.5 0.5 0.5 0.8 0.8 0.5 ]
|
|
|
|
mean value: 0.61
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.5 0.75 0.5 0.5 0.2 0.8 0.8 0.5 ]
|
|
|
|
mean value: 0.58
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.625 0.625 0.8125 0.625 0.625
|
|
0.52857143 0.82857143 0.82857143 0.60714286]
|
|
|
|
mean value: 0.6917857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.33333333 0.33333333 0.6 0.33333333 0.33333333
|
|
0.16666667 0.66666667 0.66666667 0.33333333]
|
|
|
|
mean value: 0.43666666666666665
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0142343 0.01151633 0.01047158 0.01146269 0.01170778 0.01135254
|
|
0.01145124 0.01137567 0.01155138 0.01109648]
|
|
|
|
mean value: 0.011621999740600585
|
|
|
|
key: score_time
|
|
value: [0.01149178 0.00891137 0.00910735 0.00927162 0.00931621 0.00920415
|
|
0.00913906 0.00935984 0.00922346 0.00918746]
|
|
|
|
mean value: 0.009421229362487793
|
|
|
|
key: test_mcc
|
|
value: [0.83666003 0.625 0.625 0.625 0.83666003 1.
|
|
0.47809144 1. 0.65714286 0.60714286]
|
|
|
|
mean value: 0.7290697211087622
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.83333333 0.83333333 0.83333333 0.91666667 1.
|
|
0.75 1. 0.83333333 0.81818182]
|
|
|
|
mean value: 0.8734848484848485
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.75 0.75 0.75 0.88888889 1.
|
|
0.66666667 1. 0.8 0.75 ]
|
|
|
|
mean value: 0.8244444444444444
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.75 0.75 0.75 0.8 1. 0.75 1. 0.8 0.75]
|
|
|
|
mean value: 0.8150000000000001
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 0.75 0.75 1. 1. 0.6 1. 0.8 0.75]
|
|
|
|
mean value: 0.84
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.8125 0.8125 0.8125 0.9375 1.
|
|
0.72857143 1. 0.82857143 0.80357143]
|
|
|
|
mean value: 0.8673214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.6 0.6 0.6 0.8 1.
|
|
0.5 1. 0.66666667 0.6 ]
|
|
|
|
mean value: 0.7166666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08845139 0.08627129 0.08441663 0.08516455 0.08460331 0.08442616
|
|
0.08551168 0.08432364 0.084692 0.08504295]
|
|
|
|
mean value: 0.08529036045074463
|
|
|
|
key: score_time
|
|
value: [0.01813054 0.01712751 0.01685095 0.01689005 0.01693153 0.01691318
|
|
0.0168407 0.01707983 0.01705503 0.01776624]
|
|
|
|
mean value: 0.01715855598449707
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.25 0.25 0.63245553 0.15811388 0.40824829
|
|
0.29277002 0.68313005 0.83666003 0.21428571]
|
|
|
|
mean value: 0.4542160100202007
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.66666667 0.66666667 0.83333333 0.66666667 0.75
|
|
0.66666667 0.83333333 0.91666667 0.63636364]
|
|
|
|
mean value: 0.7553030303030303
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.5 0.5 0.66666667 0.33333333 0.57142857
|
|
0.5 0.75 0.88888889 0.5 ]
|
|
|
|
mean value: 0.6067460317460317
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.5 0.5 1. 0.5 0.66666667
|
|
0.66666667 1. 1. 0.5 ]
|
|
|
|
mean value: 0.7333333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.5 0.5 0.25 0.5 0.4 0.6 0.8 0.5 ]
|
|
|
|
mean value: 0.53
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.625 0.625 0.75 0.5625 0.6875
|
|
0.62857143 0.8 0.9 0.60714286]
|
|
|
|
mean value: 0.7060714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.33333333 0.33333333 0.5 0.2 0.4
|
|
0.33333333 0.6 0.8 0.33333333]
|
|
|
|
mean value: 0.4583333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00952244 0.00952935 0.00941849 0.00951624 0.00955868 0.00941634
|
|
0.00942731 0.0094018 0.00958323 0.00910163]
|
|
|
|
mean value: 0.009447550773620606
|
|
|
|
key: score_time
|
|
value: [0.00921059 0.00916076 0.00918984 0.00924277 0.00925565 0.00907969
|
|
0.00916648 0.00929952 0.00872755 0.0085113 ]
|
|
|
|
mean value: 0.009084415435791016
|
|
|
|
key: test_mcc
|
|
value: [ 0.125 0.42640143 0.40824829 0.40824829 0.625 0.23904572
|
|
0.31428571 -0.02857143 0.50709255 0.38575837]
|
|
|
|
mean value: 0.3410508948962451
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.75 0.75 0.75 0.83333333 0.58333333
|
|
0.66666667 0.5 0.75 0.72727273]
|
|
|
|
mean value: 0.681060606060606
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.4 0.57142857 0.57142857 0.75 0.54545455
|
|
0.6 0.4 0.72727273 0.57142857]
|
|
|
|
mean value: 0.5637012987012987
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.375 1. 0.66666667 0.66666667 0.75 0.42857143
|
|
0.6 0.4 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6220238095238095
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.25 0.5 0.5 0.75 0.75 0.6 0.4 0.8 0.5 ]
|
|
|
|
mean value: 0.58
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.625 0.6875 0.6875 0.8125 0.625
|
|
0.65714286 0.48571429 0.75714286 0.67857143]
|
|
|
|
mean value: 0.6578571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.25 0.4 0.4 0.6 0.375
|
|
0.42857143 0.25 0.57142857 0.4 ]
|
|
|
|
mean value: 0.4008333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.11022449 1.10686707 1.07394433 1.17411065 1.12241173 1.1280334
|
|
1.07594752 1.093925 1.09230089 1.07993698]
|
|
|
|
mean value: 1.105770206451416
|
|
|
|
key: score_time
|
|
value: [0.09314108 0.09392309 0.08695936 0.09498835 0.09460545 0.08653736
|
|
0.0941546 0.08683944 0.09424591 0.08606005]
|
|
|
|
mean value: 0.09114546775817871
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.40824829 0.625 0.63245553 0.40824829 0.63245553
|
|
0.29277002 0.68313005 0.83666003 0.13363062]
|
|
|
|
mean value: 0.5469094946361625
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.75 0.83333333 0.83333333 0.75 0.83333333
|
|
0.66666667 0.83333333 0.91666667 0.63636364]
|
|
|
|
mean value: 0.796969696969697
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.57142857 0.75 0.66666667 0.57142857 0.66666667
|
|
0.5 0.75 0.88888889 0.33333333]
|
|
|
|
mean value: 0.6555555555555556
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.75 1. 0.66666667 1.
|
|
0.66666667 1. 1. 0.5 ]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.75 0.5 0.5 0.5 0.4 0.6 0.8 0.25]
|
|
|
|
mean value: 0.555
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.6875 0.8125 0.75 0.6875 0.75
|
|
0.62857143 0.8 0.9 0.55357143]
|
|
|
|
mean value: 0.7444642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.4 0.6 0.5 0.4 0.5
|
|
0.33333333 0.6 0.8 0.2 ]
|
|
|
|
mean value: 0.5083333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: fit_time
|
|
value: [1.72028232 0.85728979 0.8835597 0.87653446 0.84350824 0.84826064
|
|
0.85741544 0.89759564 0.87344646 0.88773561]
|
|
|
|
mean value: 0.9545628309249878
|
|
|
|
key: score_time
|
|
value: [0.16366887 0.22488356 0.20972967 0.29697323 0.24213696 0.18953753
|
|
0.17323303 0.19940066 0.21370244 0.14987516]
|
|
|
|
mean value: 0.2063141107559204
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.40824829 0.81649658 0.63245553 0.15811388 0.63245553
|
|
0.52915026 0.52915026 0.83666003 0.41833001]
|
|
|
|
mean value: 0.5777556963622036
|
|
|
|
key: train_mcc
|
|
value: [0.90115642 0.92064018 0.94025192 0.94025192 0.88178613 0.88178613
|
|
0.89985774 0.88019137 0.91962501 0.92101104]
|
|
|
|
mean value: 0.9086557859411416
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.75 0.91666667 0.83333333 0.66666667 0.83333333
|
|
0.75 0.75 0.91666667 0.72727273]
|
|
|
|
mean value: 0.806060606060606
|
|
|
|
key: train_accuracy
|
|
value: [0.95327103 0.96261682 0.97196262 0.97196262 0.94392523 0.94392523
|
|
0.95327103 0.94392523 0.96261682 0.96296296]
|
|
|
|
mean value: 0.9570439598476982
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.57142857 0.85714286 0.66666667 0.33333333 0.66666667
|
|
0.57142857 0.57142857 0.88888889 0.4 ]
|
|
|
|
mean value: 0.6384126984126984
|
|
|
|
key: train_fscore
|
|
value: [0.93150685 0.94594595 0.96 0.96 0.91666667 0.91666667
|
|
0.92957746 0.91428571 0.94444444 0.94594595]
|
|
|
|
mean value: 0.9365039698059185
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 1. 1. 0.5 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9166666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.75 0.5 0.25 0.5 0.4 0.4 0.8 0.25]
|
|
|
|
mean value: 0.51
|
|
|
|
key: train_recall
|
|
value: [0.87179487 0.8974359 0.92307692 0.92307692 0.84615385 0.84615385
|
|
0.86842105 0.84210526 0.89473684 0.8974359 ]
|
|
|
|
mean value: 0.8810391363022942
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.6875 0.875 0.75 0.5625 0.75 0.7 0.7 0.9 0.625 ]
|
|
|
|
mean value: 0.7424999999999999
|
|
|
|
key: train_roc_auc
|
|
value: [0.93589744 0.94871795 0.96153846 0.96153846 0.92307692 0.92307692
|
|
0.93421053 0.92105263 0.94736842 0.94871795]
|
|
|
|
mean value: 0.9405195681511471
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.4 0.75 0.5 0.2 0.5 0.4 0.4 0.8 0.25]
|
|
|
|
mean value: 0.495
|
|
|
|
key: train_jcc
|
|
value: [0.87179487 0.8974359 0.92307692 0.92307692 0.84615385 0.84615385
|
|
0.86842105 0.84210526 0.89473684 0.8974359 ]
|
|
|
|
mean value: 0.8810391363022942
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02321768 0.00922227 0.00949192 0.00946665 0.00959659 0.00948167
|
|
0.00957298 0.00974035 0.00960398 0.00952101]
|
|
|
|
mean value: 0.01089150905609131
|
|
|
|
key: score_time
|
|
value: [0.01008844 0.00922775 0.00936937 0.00923586 0.00924206 0.00932956
|
|
0.00904942 0.00939536 0.00939751 0.00934649]
|
|
|
|
mean value: 0.009368181228637695
|
|
|
|
key: test_mcc
|
|
value: [-0.31622777 0. -0.31622777 0.25 0.40824829 0.625
|
|
0.07559289 0.07559289 0.11952286 0.13363062]
|
|
|
|
mean value: 0.10551320295235296
|
|
|
|
key: train_mcc
|
|
value: [0.54302345 0.56482566 0.5756791 0.52383566 0.54368212 0.52105076
|
|
0.51620555 0.51327923 0.51494818 0.54491455]
|
|
|
|
mean value: 0.5361444261644434
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.58333333 0.5 0.66666667 0.75 0.83333333
|
|
0.58333333 0.58333333 0.58333333 0.63636364]
|
|
|
|
mean value: 0.621969696969697
|
|
|
|
key: train_accuracy
|
|
value: [0.79439252 0.80373832 0.80373832 0.78504673 0.79439252 0.78504673
|
|
0.78504673 0.78504673 0.78504673 0.7962963 ]
|
|
|
|
mean value: 0.79177916233991
|
|
|
|
key: test_fscore
|
|
value: [0. 0.28571429 0. 0.5 0.57142857 0.75
|
|
0.28571429 0.28571429 0.44444444 0.33333333]
|
|
|
|
mean value: 0.34563492063492063
|
|
|
|
key: train_fscore
|
|
value: [0.67647059 0.69565217 0.6557377 0.63492063 0.68571429 0.65671642
|
|
0.66666667 0.63492063 0.62295082 0.67647059]
|
|
|
|
mean value: 0.6606220515106465
|
|
|
|
key: test_precision
|
|
value: [0. 0.33333333 0. 0.5 0.66666667 0.75
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.425
|
|
|
|
key: train_precision
|
|
value: [0.79310345 0.8 0.90909091 0.83333333 0.77419355 0.78571429
|
|
0.74193548 0.8 0.82608696 0.79310345]
|
|
|
|
mean value: 0.8056561413470056
|
|
|
|
key: test_recall
|
|
value: [0. 0.25 0. 0.5 0.5 0.75 0.2 0.2 0.4 0.25]
|
|
|
|
mean value: 0.305
|
|
|
|
key: train_recall
|
|
value: [0.58974359 0.61538462 0.51282051 0.51282051 0.61538462 0.56410256
|
|
0.60526316 0.52631579 0.5 0.58974359]
|
|
|
|
mean value: 0.5631578947368421
|
|
|
|
key: test_roc_auc
|
|
value: [0.375 0.5 0.375 0.625 0.6875 0.8125
|
|
0.52857143 0.52857143 0.55714286 0.55357143]
|
|
|
|
mean value: 0.5542857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.75075415 0.76357466 0.74170437 0.72699849 0.75622172 0.73793363
|
|
0.74466056 0.72692601 0.72101449 0.75139353]
|
|
|
|
mean value: 0.7421181630546956
|
|
|
|
key: test_jcc
|
|
value: [0. 0.16666667 0. 0.33333333 0.4 0.6
|
|
0.16666667 0.16666667 0.28571429 0.2 ]
|
|
|
|
mean value: 0.2319047619047619
|
|
|
|
key: train_jcc
|
|
value: [0.51111111 0.53333333 0.48780488 0.46511628 0.52173913 0.48888889
|
|
0.5 0.46511628 0.45238095 0.51111111]
|
|
|
|
mean value: 0.4936601963448495
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.15230823 0.0347209 0.03545165 0.0362525 0.03555393 0.0401206
|
|
0.03871918 0.04143095 0.0373764 0.04362845]
|
|
|
|
mean value: 0.04955627918243408
|
|
|
|
key: score_time
|
|
value: [0.01079464 0.01076293 0.01020479 0.01042175 0.0106473 0.01025009
|
|
0.01163721 0.01053095 0.01053333 0.01037621]
|
|
|
|
mean value: 0.010615921020507813
|
|
|
|
key: test_mcc
|
|
value: [1. 0.625 1. 1. 0.83666003 1.
|
|
0.65714286 1. 0.65714286 0.81009259]
|
|
|
|
mean value: 0.8586038328120772
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.83333333 1. 1. 0.91666667 1.
|
|
0.83333333 1. 0.83333333 0.90909091]
|
|
|
|
mean value: 0.9325757575757576
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.75 1. 1. 0.88888889 1.
|
|
0.8 1. 0.8 0.85714286]
|
|
|
|
mean value: 0.9096031746031746
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 1. 1. 0.8 1. 0.8 1. 0.8 1. ]
|
|
|
|
mean value: 0.915
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 1. 1. 1. 1. 0.8 1. 0.8 0.75]
|
|
|
|
mean value: 0.91
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.8125 1. 1. 0.9375 1.
|
|
0.82857143 1. 0.82857143 0.875 ]
|
|
|
|
mean value: 0.9282142857142858
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.6 1. 1. 0.8 1.
|
|
0.66666667 1. 0.66666667 0.75 ]
|
|
|
|
mean value: 0.8483333333333334
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.03027558 0.01925349 0.04498887 0.04488349 0.0192492 0.02261639
|
|
0.04425192 0.04414797 0.04453683 0.04522443]
|
|
|
|
mean value: 0.035942816734313966
|
|
|
|
key: score_time
|
|
value: [0.01195264 0.01174021 0.02140355 0.01278901 0.01185536 0.0207603
|
|
0.02176905 0.02263951 0.02292538 0.02251673]
|
|
|
|
mean value: 0.018035173416137695
|
|
|
|
key: test_mcc
|
|
value: [ 0.125 0.25 0.11952286 -0.23904572 0.5 0.
|
|
0.29277002 0.02857143 -0.29277002 -0.06900656]
|
|
|
|
mean value: 0.07150420117037538
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.66666667 0.58333333 0.41666667 0.66666667 0.5
|
|
0.66666667 0.5 0.33333333 0.45454545]
|
|
|
|
mean value: 0.5287878787878788
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.5 0.44444444 0.22222222 0.66666667 0.4
|
|
0.5 0.5 0.42857143 0.4 ]
|
|
|
|
mean value: 0.4561904761904762
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.375 0.5 0.4 0.2 0.5 0.33333333
|
|
0.66666667 0.42857143 0.33333333 0.33333333]
|
|
|
|
mean value: 0.4070238095238095
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.5 0.25 1. 0.5 0.4 0.6 0.6 0.5 ]
|
|
|
|
mean value: 0.56
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.625 0.5625 0.375 0.75 0.5
|
|
0.62857143 0.51428571 0.37142857 0.46428571]
|
|
|
|
mean value: 0.5353571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.33333333 0.28571429 0.125 0.5 0.25
|
|
0.33333333 0.33333333 0.27272727 0.25 ]
|
|
|
|
mean value: 0.30167748917748916
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02219272 0.00896144 0.00897241 0.00866389 0.00925398 0.00860715
|
|
0.00947046 0.00968862 0.00901008 0.00871587]
|
|
|
|
mean value: 0.010353660583496094
|
|
|
|
key: score_time
|
|
value: [0.02152371 0.00910091 0.00912261 0.00854945 0.00911403 0.00849843
|
|
0.00861621 0.00916672 0.00870585 0.00874329]
|
|
|
|
mean value: 0.010114121437072753
|
|
|
|
key: test_mcc
|
|
value: [ 0.625 0.40824829 0.11952286 0.81649658 0. 0.
|
|
-0.09759001 0.23904572 0.31428571 0.21428571]
|
|
|
|
mean value: 0.26392948754684825
|
|
|
|
key: train_mcc
|
|
value: [0.30586639 0.30789368 0.42925551 0.36941929 0.3617205 0.43262802
|
|
0.43009525 0.34175868 0.37917626 0.37501394]
|
|
|
|
mean value: 0.37328275032791647
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.58333333 0.91666667 0.58333333 0.41666667
|
|
0.5 0.58333333 0.66666667 0.63636364]
|
|
|
|
mean value: 0.646969696969697
|
|
|
|
key: train_accuracy
|
|
value: [0.6728972 0.6635514 0.73831776 0.70093458 0.70093458 0.72897196
|
|
0.71962617 0.68224299 0.71028037 0.69444444]
|
|
|
|
mean value: 0.7012201453790239
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.57142857 0.44444444 0.85714286 0.28571429 0.46153846
|
|
0.25 0.61538462 0.6 0.5 ]
|
|
|
|
mean value: 0.5335653235653236
|
|
|
|
key: train_fscore
|
|
value: [0.56790123 0.58139535 0.63157895 0.6097561 0.6 0.65060241
|
|
0.65116279 0.5952381 0.60759494 0.62068966]
|
|
|
|
mean value: 0.6115919515790106
|
|
|
|
key: test_precision
|
|
value: [0.75 0.66666667 0.4 1. 0.33333333 0.33333333
|
|
0.33333333 0.5 0.6 0.5 ]
|
|
|
|
mean value: 0.5416666666666666
|
|
|
|
key: train_precision
|
|
value: [0.54761905 0.53191489 0.64864865 0.58139535 0.58536585 0.61363636
|
|
0.58333333 0.54347826 0.58536585 0.5625 ]
|
|
|
|
mean value: 0.5783257603878262
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.5 0.75 0.25 0.75 0.2 0.8 0.6 0.5 ]
|
|
|
|
mean value: 0.56
|
|
|
|
key: train_recall
|
|
value: [0.58974359 0.64102564 0.61538462 0.64102564 0.61538462 0.69230769
|
|
0.73684211 0.65789474 0.63157895 0.69230769]
|
|
|
|
mean value: 0.6513495276653172
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.6875 0.5625 0.875 0.5 0.5
|
|
0.45714286 0.61428571 0.65714286 0.60714286]
|
|
|
|
mean value: 0.6273214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.65516591 0.65874811 0.71210407 0.68815988 0.68269231 0.72115385
|
|
0.72349352 0.67677346 0.69260107 0.69397993]
|
|
|
|
mean value: 0.6904872105504761
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.4 0.28571429 0.75 0.16666667 0.3
|
|
0.14285714 0.44444444 0.42857143 0.33333333]
|
|
|
|
mean value: 0.38515873015873014
|
|
|
|
key: train_jcc
|
|
value: [0.39655172 0.40983607 0.46153846 0.43859649 0.42857143 0.48214286
|
|
0.48275862 0.42372881 0.43636364 0.45 ]
|
|
|
|
mean value: 0.4410088098805133
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01069689 0.01261544 0.01330781 0.01351762 0.01318288 0.0139327
|
|
0.01301622 0.01384878 0.01437545 0.01323342]
|
|
|
|
mean value: 0.013172721862792969
|
|
|
|
key: score_time
|
|
value: [0.00870061 0.01082563 0.01075101 0.0115633 0.01132607 0.01154351
|
|
0.01151013 0.01174736 0.011554 0.01127577]
|
|
|
|
mean value: 0.011079740524291993
|
|
|
|
key: test_mcc
|
|
value: [0.81649658 0.40824829 0.11952286 0.81649658 0.47809144 0.5976143
|
|
0.09759001 0.50709255 0.65714286 0.41833001]
|
|
|
|
mean value: 0.4916625492195567
|
|
|
|
key: train_mcc
|
|
value: [0.8843113 0.82420912 0.9600061 0.90115642 0.94025192 0.77885663
|
|
0.70820392 0.88799637 0.97968078 0.8996469 ]
|
|
|
|
mean value: 0.8764319472697766
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.75 0.58333333 0.91666667 0.75 0.75
|
|
0.5 0.75 0.83333333 0.72727273]
|
|
|
|
mean value: 0.7477272727272727
|
|
|
|
key: train_accuracy
|
|
value: [0.94392523 0.91588785 0.98130841 0.95327103 0.97196262 0.87850467
|
|
0.8317757 0.94392523 0.99065421 0.9537037 ]
|
|
|
|
mean value: 0.9364918656974732
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.57142857 0.44444444 0.85714286 0.66666667 0.72727273
|
|
0.57142857 0.72727273 0.8 0.4 ]
|
|
|
|
mean value: 0.6622799422799422
|
|
|
|
key: train_fscore
|
|
value: [0.92682927 0.86956522 0.97368421 0.93150685 0.96 0.85714286
|
|
0.80851064 0.92682927 0.98666667 0.93333333]
|
|
|
|
mean value: 0.9174068309258784
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.4 1. 0.6 0.57142857
|
|
0.44444444 0.66666667 0.8 1. ]
|
|
|
|
mean value: 0.714920634920635
|
|
|
|
key: train_precision
|
|
value: [0.88372093 1. 1. 1. 1. 0.75
|
|
0.67857143 0.86363636 1. 0.97222222]
|
|
|
|
mean value: 0.9148150944662573
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.5 0.75 0.75 1. 0.8 0.8 0.8 0.25]
|
|
|
|
mean value: 0.6900000000000001
|
|
|
|
key: train_recall
|
|
value: [0.97435897 0.76923077 0.94871795 0.87179487 0.92307692 1.
|
|
1. 1. 0.97368421 0.8974359 ]
|
|
|
|
mean value: 0.93582995951417
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.6875 0.5625 0.875 0.75 0.8125
|
|
0.54285714 0.75714286 0.82857143 0.625 ]
|
|
|
|
mean value: 0.7316071428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.95041478 0.88461538 0.97435897 0.93589744 0.96153846 0.90441176
|
|
0.86956522 0.95652174 0.98684211 0.94147157]
|
|
|
|
mean value: 0.9365637436104525
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.4 0.28571429 0.75 0.5 0.57142857
|
|
0.4 0.57142857 0.66666667 0.25 ]
|
|
|
|
mean value: 0.5145238095238095
|
|
|
|
key: train_jcc
|
|
value: [0.86363636 0.76923077 0.94871795 0.87179487 0.92307692 0.75
|
|
0.67857143 0.86363636 0.97368421 0.875 ]
|
|
|
|
mean value: 0.8517348879190985
|
|
|
|
MCC on Blind test: 0.14
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01269102 0.01227665 0.01251817 0.01275468 0.01322985 0.01274872
|
|
0.01255131 0.01216674 0.01289606 0.01244688]
|
|
|
|
mean value: 0.012628006935119628
|
|
|
|
key: score_time
|
|
value: [0.01155043 0.01127887 0.01133299 0.01129389 0.01123714 0.01129723
|
|
0.01133728 0.01136446 0.01130033 0.01130509]
|
|
|
|
mean value: 0.0113297700881958
|
|
|
|
key: test_mcc
|
|
value: [ 0.81649658 0.25 0.25 0.42640143 0.625 -0.21320072
|
|
0.07559289 0.68313005 0.65714286 0.44854261]
|
|
|
|
mean value: 0.40191057136645425
|
|
|
|
key: train_mcc
|
|
value: [0.87895928 0.73075324 0.97991726 0.59383173 0.98013354 0.26021572
|
|
0.91962501 0.73787244 0.88799637 0.88955289]
|
|
|
|
mean value: 0.7858857488632192
|
|
|
|
key: test_accuracy
|
|
value: [0.91666667 0.66666667 0.66666667 0.75 0.83333333 0.58333333
|
|
0.58333333 0.83333333 0.83333333 0.72727273]
|
|
|
|
mean value: 0.7393939393939394
|
|
|
|
key: train_accuracy
|
|
value: [0.94392523 0.85981308 0.99065421 0.80373832 0.99065421 0.6728972
|
|
0.96261682 0.87850467 0.94392523 0.94444444]
|
|
|
|
mean value: 0.8991173416407061
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.5 0.5 0.4 0.75 0.
|
|
0.28571429 0.75 0.8 0.66666667]
|
|
|
|
mean value: 0.550952380952381
|
|
|
|
key: train_fscore
|
|
value: [0.92307692 0.83146067 0.98701299 0.63157895 0.98734177 0.18604651
|
|
0.94444444 0.8 0.92682927 0.92857143]
|
|
|
|
mean value: 0.8146362956703995
|
|
|
|
key: test_precision
|
|
value: [1. 0.5 0.5 1. 0.75 0. 0.5 1. 0.8 0.6 ]
|
|
|
|
mean value: 0.665
|
|
|
|
key: train_precision
|
|
value: [0.92307692 0.74 1. 1. 0.975 1.
|
|
1. 0.96296296 0.86363636 0.86666667]
|
|
|
|
mean value: 0.9331342916342916
|
|
|
|
key: test_recall
|
|
value: [0.75 0.5 0.5 0.25 0.75 0. 0.2 0.6 0.8 0.75]
|
|
|
|
mean value: 0.51
|
|
|
|
key: train_recall
|
|
value: [0.92307692 0.94871795 0.97435897 0.46153846 1. 0.1025641
|
|
0.89473684 0.68421053 1. 1. ]
|
|
|
|
mean value: 0.7989203778677463
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.625 0.625 0.625 0.8125 0.4375
|
|
0.52857143 0.8 0.82857143 0.73214286]
|
|
|
|
mean value: 0.6889285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.93947964 0.87877074 0.98717949 0.73076923 0.99264706 0.55128205
|
|
0.94736842 0.83485889 0.95652174 0.95652174]
|
|
|
|
mean value: 0.8775398990788007
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.33333333 0.33333333 0.25 0.6 0.
|
|
0.16666667 0.6 0.66666667 0.5 ]
|
|
|
|
mean value: 0.42
|
|
|
|
key: train_jcc
|
|
value: [0.85714286 0.71153846 0.97435897 0.46153846 0.975 0.1025641
|
|
0.89473684 0.66666667 0.86363636 0.86666667]
|
|
|
|
mean value: 0.7373849396217818
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.82
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10345721 0.08764267 0.08817434 0.08749247 0.08806634 0.08874726
|
|
0.08956051 0.08980393 0.08894587 0.0904038 ]
|
|
|
|
mean value: 0.0902294397354126
|
|
|
|
key: score_time
|
|
value: [0.01449013 0.01465034 0.01477146 0.01476502 0.01470923 0.01490712
|
|
0.01549172 0.01508617 0.01505709 0.01503992]
|
|
|
|
mean value: 0.014896821975708009
|
|
|
|
key: test_mcc
|
|
value: [1. 0.625 0.625 0.63245553 0.83666003 0.83666003
|
|
0.50709255 1. 0.83666003 0.81009259]
|
|
|
|
mean value: 0.7709620751773996
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.83333333 0.83333333 0.83333333 0.91666667 0.91666667
|
|
0.75 1. 0.91666667 0.90909091]
|
|
|
|
mean value: 0.8909090909090909
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.75 0.75 0.66666667 0.88888889 0.88888889
|
|
0.72727273 1. 0.88888889 0.85714286]
|
|
|
|
mean value: 0.8417748917748917
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.75 1. 0.8 0.8
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: 0.8766666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 0.75 0.5 1. 1. 0.8 1. 0.8 0.75]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.8125 0.8125 0.75 0.9375 0.9375
|
|
0.75714286 1. 0.9 0.875 ]
|
|
|
|
mean value: 0.8782142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.6 0.6 0.5 0.8 0.8
|
|
0.57142857 1. 0.8 0.75 ]
|
|
|
|
mean value: 0.7421428571428571
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03509355 0.03801394 0.04573321 0.04925704 0.04040885 0.02893972
|
|
0.03013682 0.03751421 0.04837871 0.04894233]
|
|
|
|
mean value: 0.04024183750152588
|
|
|
|
key: score_time
|
|
value: [0.02462077 0.03305888 0.04407382 0.03202057 0.02337003 0.02233243
|
|
0.02474475 0.03469205 0.02849483 0.033566 ]
|
|
|
|
mean value: 0.030097413063049316
|
|
|
|
key: test_mcc
|
|
value: [1. 0.625 0.81649658 1. 0.83666003 1.
|
|
0.83666003 1. 0.65714286 0.81009259]
|
|
|
|
mean value: 0.8582052078439717
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97991726 1. 0.95965309 0.97991726 1.
|
|
0.97991726 1. 1. 0.95986622]
|
|
|
|
mean value: 0.9859271082573318
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.83333333 0.91666667 1. 0.91666667 1.
|
|
0.91666667 1. 0.83333333 0.90909091]
|
|
|
|
mean value: 0.9325757575757576
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.99065421 1. 0.98130841 0.99065421 1.
|
|
0.99065421 1. 1. 0.98148148]
|
|
|
|
mean value: 0.9934752509518865
|
|
|
|
key: test_fscore
|
|
value: [1. 0.75 0.85714286 1. 0.88888889 1.
|
|
0.88888889 1. 0.8 0.85714286]
|
|
|
|
mean value: 0.9042063492063492
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98701299 1. 0.97435897 0.98701299 1.
|
|
0.98701299 1. 1. 0.97435897]
|
|
|
|
mean value: 0.9909756909756909
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 1. 1. 0.8 1. 1. 1. 0.8 1. ]
|
|
|
|
mean value: 0.935
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.97435897 1. 1.
|
|
0.97435897 1. 1. 0.97435897]
|
|
|
|
mean value: 0.9923076923076923
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 0.75 1. 1. 1. 0.8 1. 0.8 0.75]
|
|
|
|
mean value: 0.885
|
|
|
|
key: train_recall
|
|
value: [1. 0.97435897 1. 0.97435897 0.97435897 1.
|
|
1. 1. 1. 0.97435897]
|
|
|
|
mean value: 0.9897435897435898
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.8125 0.875 1. 0.9375 1.
|
|
0.9 1. 0.82857143 0.875 ]
|
|
|
|
mean value: 0.9228571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98717949 1. 0.97982655 0.98717949 1.
|
|
0.99275362 1. 1. 0.97993311]
|
|
|
|
mean value: 0.992687225391829
|
|
|
|
key: test_jcc
|
|
value: [1. 0.6 0.75 1. 0.8 1.
|
|
0.8 1. 0.66666667 0.75 ]
|
|
|
|
mean value: 0.8366666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97435897 1. 0.95 0.97435897 1.
|
|
0.97435897 1. 1. 0.95 ]
|
|
|
|
mean value: 0.9823076923076923
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02343273 0.01722765 0.01697659 0.02735806 0.04024768 0.04029274
|
|
0.02456927 0.03989363 0.03921294 0.04148316]
|
|
|
|
mean value: 0.031069445610046386
|
|
|
|
key: score_time
|
|
value: [0.01215196 0.01174808 0.01172161 0.02232957 0.01338077 0.0117867
|
|
0.01184607 0.02344608 0.0225389 0.02162814]
|
|
|
|
mean value: 0.01625778675079346
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0. 0.25 0.15811388 0.15811388 0.15811388
|
|
0.11952286 0.68313005 0.68313005 -0.03857584]
|
|
|
|
mean value: 0.21715487745961196
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.58333333 0.66666667 0.66666667 0.66666667 0.66666667
|
|
0.58333333 0.83333333 0.83333333 0.54545455]
|
|
|
|
mean value: 0.6545454545454545
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.28571429 0.5 0.33333333 0.33333333 0.33333333
|
|
0.44444444 0.75 0.75 0.28571429]
|
|
|
|
mean value: 0.4415873015873016
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.33333333 0.33333333 0.5 0.5 0.5 0.5
|
|
0.5 1. 1. 0.33333333]
|
|
|
|
mean value: 0.55
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.25 0.5 0.25 0.25 0.25 0.4 0.6 0.6 0.25]
|
|
|
|
mean value: 0.385
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.5 0.625 0.5625 0.5625 0.5625
|
|
0.55714286 0.8 0.8 0.48214286]
|
|
|
|
mean value: 0.5951785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.16666667 0.33333333 0.2 0.2 0.2
|
|
0.28571429 0.6 0.6 0.16666667]
|
|
|
|
mean value: 0.30023809523809525
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.75
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.22485304 0.20807576 0.21185255 0.21056032 0.20868492 0.21106625
|
|
0.22972059 0.21120667 0.23677921 0.1712718 ]
|
|
|
|
mean value: 0.21240711212158203
|
|
|
|
key: score_time
|
|
value: [0.00908399 0.00889945 0.00889659 0.00886393 0.00904346 0.00892591
|
|
0.00900364 0.0089159 0.00945544 0.0089376 ]
|
|
|
|
mean value: 0.009002590179443359
|
|
|
|
key: test_mcc
|
|
value: [1. 0.625 0.625 0.81649658 0.83666003 1.
|
|
0.65714286 1. 0.65714286 0.60714286]
|
|
|
|
mean value: 0.7824585178890373
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.83333333 0.83333333 0.91666667 0.91666667 1.
|
|
0.83333333 1. 0.83333333 0.81818182]
|
|
|
|
mean value: 0.8984848484848484
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.75 0.75 0.85714286 0.88888889 1.
|
|
0.8 1. 0.8 0.75 ]
|
|
|
|
mean value: 0.8596031746031746
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.75 1. 0.8 1. 0.8 1. 0.8 0.75]
|
|
|
|
mean value: 0.865
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.75 0.75 0.75 1. 1. 0.8 1. 0.8 0.75]
|
|
|
|
mean value: 0.86
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.8125 0.8125 0.875 0.9375 1.
|
|
0.82857143 1. 0.82857143 0.80357143]
|
|
|
|
mean value: 0.8898214285714285
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.6 0.6 0.75 0.8 1.
|
|
0.66666667 1. 0.66666667 0.6 ]
|
|
|
|
mean value: 0.7683333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0173173 0.01490331 0.01552176 0.01563334 0.01692104 0.0218811
|
|
0.01528478 0.02272177 0.01535749 0.01550364]
|
|
|
|
mean value: 0.017104554176330566
|
|
|
|
key: score_time
|
|
value: [0.01197457 0.01160645 0.01172018 0.01262617 0.01268053 0.01280403
|
|
0.0124104 0.01255608 0.01246977 0.0125246 ]
|
|
|
|
mean value: 0.012337279319763184
|
|
|
|
key: test_mcc
|
|
value: [ 0.42640143 -0.125 0.15811388 -0.23904572 -0.23904572 0.15811388
|
|
-0.02857143 -0.09759001 0.68313005 -0.17857143]
|
|
|
|
mean value: 0.051793494162056405
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.5 0.66666667 0.41666667 0.41666667 0.66666667
|
|
0.5 0.5 0.83333333 0.45454545]
|
|
|
|
mean value: 0.5704545454545454
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.25 0.33333333 0.22222222 0.22222222 0.33333333
|
|
0.4 0.25 0.75 0.25 ]
|
|
|
|
mean value: 0.3411111111111111
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.25 0.5 0.2 0.2 0.5
|
|
0.4 0.33333333 1. 0.25 ]
|
|
|
|
mean value: 0.4633333333333333
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0.25 0.25 0.25 0.25 0.25 0.4 0.2 0.6 0.25]
|
|
|
|
mean value: 0.295
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.4375 0.5625 0.375 0.375 0.5625
|
|
0.48571429 0.45714286 0.8 0.41071429]
|
|
|
|
mean value: 0.5091071428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.14285714 0.2 0.125 0.125 0.2
|
|
0.25 0.14285714 0.6 0.14285714]
|
|
|
|
mean value: 0.21785714285714286
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.05
|
|
|
|
Accuracy on Blind test: 0.77
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02931094 0.03452849 0.03164291 0.01259708 0.01256371 0.0125587
|
|
0.01257324 0.0173099 0.03186011 0.01252818]
|
|
|
|
mean value: 0.02074732780456543
|
|
|
|
key: score_time
|
|
value: [0.02166176 0.02129889 0.01981616 0.01143384 0.01167202 0.0114224
|
|
0.01144147 0.02194834 0.01146293 0.01149607]
|
|
|
|
mean value: 0.015365386009216308
|
|
|
|
key: test_mcc
|
|
value: [1. 0.40824829 0.11952286 0.81649658 0.35355339 0.625
|
|
0.29277002 0.65714286 0.65714286 0.38575837]
|
|
|
|
mean value: 0.5315635233993806
|
|
|
|
key: train_mcc
|
|
value: [0.95965309 0.97991726 0.9600061 0.95965309 0.95965309 0.95965309
|
|
0.95919146 0.95919146 0.93862091 0.93965322]
|
|
|
|
mean value: 0.9575192777485965
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.75 0.58333333 0.91666667 0.66666667 0.83333333
|
|
0.66666667 0.83333333 0.83333333 0.72727273]
|
|
|
|
mean value: 0.781060606060606
|
|
|
|
key: train_accuracy
|
|
value: [0.98130841 0.99065421 0.98130841 0.98130841 0.98130841 0.98130841
|
|
0.98130841 0.98130841 0.97196262 0.97222222]
|
|
|
|
mean value: 0.9803997923156802
|
|
|
|
key: test_fscore
|
|
value: [1. 0.57142857 0.44444444 0.85714286 0.6 0.75
|
|
0.5 0.8 0.8 0.57142857]
|
|
|
|
mean value: 0.6894444444444445
|
|
|
|
key: train_fscore
|
|
value: [0.97435897 0.98701299 0.97368421 0.97435897 0.97435897 0.97435897
|
|
0.97368421 0.97368421 0.96 0.96103896]
|
|
|
|
mean value: 0.9726540477066793
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.4 1. 0.5 0.75
|
|
0.66666667 0.8 0.8 0.66666667]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_precision
|
|
value: [0.97435897 1. 1. 0.97435897 0.97435897 0.97435897
|
|
0.97368421 0.97368421 0.97297297 0.97368421]
|
|
|
|
mean value: 0.9791461501987818
|
|
|
|
key: test_recall
|
|
value: [1. 0.5 0.5 0.75 0.75 0.75 0.4 0.8 0.8 0.5 ]
|
|
|
|
mean value: 0.675
|
|
|
|
key: train_recall
|
|
value: [0.97435897 0.97435897 0.94871795 0.97435897 0.97435897 0.97435897
|
|
0.97368421 0.97368421 0.94736842 0.94871795]
|
|
|
|
mean value: 0.9663967611336032
|
|
|
|
key: test_roc_auc
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:114: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:117: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
baseline_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[1. 0.6875 0.5625 0.875 0.6875 0.8125
|
|
0.62857143 0.82857143 0.82857143 0.67857143]
|
|
|
|
mean value: 0.7589285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.97982655 0.98717949 0.97435897 0.97982655 0.97982655 0.97982655
|
|
0.97959573 0.97959573 0.96643783 0.9671126 ]
|
|
|
|
mean value: 0.9773586533715757
|
|
|
|
key: test_jcc
|
|
value: [1. 0.4 0.28571429 0.75 0.42857143 0.6
|
|
0.33333333 0.66666667 0.66666667 0.4 ]
|
|
|
|
mean value: 0.5530952380952381
|
|
|
|
key: train_jcc
|
|
value: [0.95 0.97435897 0.94871795 0.95 0.95 0.95
|
|
0.94871795 0.94871795 0.92307692 0.925 ]
|
|
|
|
mean value: 0.9468589743589744
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.11067224 0.12599802 0.18228793 0.18764257 0.19317532 0.18818402
|
|
0.26005721 0.21987581 0.09147549 0.16718626]
|
|
|
|
mean value: 0.17265548706054687
|
|
|
|
key: score_time
|
|
value: [0.01199269 0.02052832 0.01966453 0.02230549 0.02030396 0.02112412
|
|
0.02278662 0.01183152 0.01173759 0.01176786]
|
|
|
|
mean value: 0.01740427017211914
|
|
|
|
key: test_mcc
|
|
value: [0.63245553 0.40824829 0.11952286 0.81649658 0.35355339 0.35355339
|
|
0.29277002 0.65714286 0.65714286 0.38575837]
|
|
|
|
mean value: 0.46766441566207556
|
|
|
|
key: train_mcc
|
|
value: [0.75771445 0.97991726 0.9600061 0.95965309 0.95965309 1.
|
|
0.95919146 0.95919146 0.93862091 0.93965322]
|
|
|
|
mean value: 0.9413601047309211
|
|
|
|
key: test_accuracy
|
|
value: [0.83333333 0.75 0.58333333 0.91666667 0.66666667 0.66666667
|
|
0.66666667 0.83333333 0.83333333 0.72727273]
|
|
|
|
mean value: 0.7477272727272727
|
|
|
|
key: train_accuracy
|
|
value: [0.88785047 0.99065421 0.98130841 0.98130841 0.98130841 1.
|
|
0.98130841 0.98130841 0.97196262 0.97222222]
|
|
|
|
mean value: 0.9729231568016614
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.57142857 0.44444444 0.85714286 0.6 0.6
|
|
0.5 0.8 0.8 0.57142857]
|
|
|
|
mean value: 0.6411111111111112
|
|
|
|
key: train_fscore
|
|
value: [0.82857143 0.98701299 0.97368421 0.97435897 0.97435897 1.
|
|
0.97368421 0.97368421 0.96 0.96103896]
|
|
|
|
mean value: 0.9606393956920273
|
|
|
|
key: test_precision
|
|
value: [1. 0.66666667 0.4 1. 0.5 0.5
|
|
0.66666667 0.8 0.8 0.66666667]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_precision
|
|
value: [0.93548387 1. 1. 0.97435897 0.97435897 1.
|
|
0.97368421 0.97368421 0.97297297 0.97368421]
|
|
|
|
mean value: 0.9778227424237611
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.5 0.75 0.75 0.75 0.4 0.8 0.8 0.5 ]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_recall
|
|
value: [0.74358974 0.97435897 0.94871795 0.97435897 0.97435897 1.
|
|
0.97368421 0.97368421 0.94736842 0.94871795]
|
|
|
|
mean value: 0.9458839406207827
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.6875 0.5625 0.875 0.6875 0.6875
|
|
0.62857143 0.82857143 0.82857143 0.67857143]
|
|
|
|
mean value: 0.7214285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.85708899 0.98717949 0.97435897 0.97982655 0.97982655 1.
|
|
0.97959573 0.97959573 0.96643783 0.9671126 ]
|
|
|
|
mean value: 0.9671022431151655
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.4 0.28571429 0.75 0.42857143 0.42857143
|
|
0.33333333 0.66666667 0.66666667 0.4 ]
|
|
|
|
mean value: 0.4859523809523809
|
|
|
|
key: train_jcc
|
|
value: [0.70731707 0.97435897 0.94871795 0.95 0.95 1.
|
|
0.94871795 0.94871795 0.92307692 0.925 ]
|
|
|
|
mean value: 0.9275906816760475
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02570605 0.02531195 0.02237129 0.02603054 0.03007078 0.02120304
|
|
0.02243948 0.02645206 0.02500463 0.02917624]
|
|
|
|
mean value: 0.025376605987548827
|
|
|
|
key: score_time
|
|
value: [0.01148796 0.01151919 0.01135206 0.01143742 0.01142955 0.01138592
|
|
0.01134014 0.01146054 0.01140571 0.01153827]
|
|
|
|
mean value: 0.011435675621032714
|
|
|
|
key: test_mcc
|
|
value: [0.67419986 0.37796447 0.37796447 0.73214286 0.76376262 0.6000992
|
|
0.60714286 1. 0.73214286 0.6000992 ]
|
|
|
|
mean value: 0.64655183920342
|
|
|
|
key: train_mcc
|
|
value: [0.91176471 0.86774089 0.94323594 0.88320546 0.91281179 0.91240409
|
|
0.88320546 0.91240409 0.91240409 0.89791134]
|
|
|
|
mean value: 0.9037087856483232
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.6875 0.66666667 0.86666667 0.86666667 0.8
|
|
0.8 1. 0.86666667 0.8 ]
|
|
|
|
mean value: 0.8166666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.95588235 0.93382353 0.97080292 0.94160584 0.95620438 0.95620438
|
|
0.94160584 0.95620438 0.95620438 0.94890511]
|
|
|
|
mean value: 0.9517443108630314
|
|
|
|
key: test_fscore
|
|
value: [0.76923077 0.66666667 0.70588235 0.85714286 0.875 0.76923077
|
|
0.8 1. 0.875 0.82352941]
|
|
|
|
mean value: 0.8141682826976945
|
|
|
|
key: train_fscore
|
|
value: [0.95588235 0.93333333 0.97014925 0.94202899 0.95588235 0.95652174
|
|
0.94117647 0.95588235 0.95588235 0.94890511]
|
|
|
|
mean value: 0.951564430354435
|
|
|
|
key: test_precision
|
|
value: [1. 0.71428571 0.6 0.85714286 0.77777778 0.83333333
|
|
0.85714286 1. 0.875 0.77777778]
|
|
|
|
mean value: 0.8292460317460317
|
|
|
|
key: train_precision
|
|
value: [0.95588235 0.94029851 1. 0.94202899 0.97014925 0.95652174
|
|
0.94117647 0.95588235 0.95588235 0.94202899]
|
|
|
|
mean value: 0.9559851000750722
|
|
|
|
key: test_recall
|
|
value: [0.625 0.625 0.85714286 0.85714286 1. 0.71428571
|
|
0.75 1. 0.875 0.875 ]
|
|
|
|
mean value: 0.8178571428571428
|
|
|
|
key: train_recall
|
|
value: [0.95588235 0.92647059 0.94202899 0.94202899 0.94202899 0.95652174
|
|
0.94117647 0.95588235 0.95588235 0.95588235]
|
|
|
|
mean value: 0.9473785166240409
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.6875 0.67857143 0.86607143 0.875 0.79464286
|
|
0.80357143 1. 0.86607143 0.79464286]
|
|
|
|
mean value: 0.8178571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.95588235 0.93382353 0.97101449 0.94160273 0.95630861 0.95620205
|
|
0.94160273 0.95620205 0.95620205 0.94895567]
|
|
|
|
mean value: 0.9517796248934357
|
|
|
|
key: test_jcc
|
|
value: [0.625 0.5 0.54545455 0.75 0.77777778 0.625
|
|
0.66666667 1. 0.77777778 0.7 ]
|
|
|
|
mean value: 0.6967676767676767
|
|
|
|
key: train_jcc
|
|
value: [0.91549296 0.875 0.94202899 0.89041096 0.91549296 0.91666667
|
|
0.88888889 0.91549296 0.91549296 0.90277778]
|
|
|
|
mean value: 0.9077745108730605
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.64
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.59272099 0.73302746 0.57024789 0.63847446 0.81539202 0.66339469
|
|
0.66922975 1.00259376 0.63085222 0.65632081]
|
|
|
|
mean value: 0.6972254037857055
|
|
|
|
key: score_time
|
|
value: [0.01941419 0.03988981 0.01182389 0.01283121 0.01295757 0.01300907
|
|
0.02492142 0.0128777 0.01295567 0.01319575]
|
|
|
|
mean value: 0.01738762855529785
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.75 0.60714286 1. 0.66143783 0.73214286
|
|
0.64465837 0.87287156 0.32732684 0.875 ]
|
|
|
|
mean value: 0.7245176978811607
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.8 1. 0.8 0.86666667
|
|
0.8 0.93333333 0.66666667 0.93333333]
|
|
|
|
mean value: 0.855
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.875 0.8 1. 0.82352941 0.85714286
|
|
0.84210526 0.94117647 0.70588235 0.93333333]
|
|
|
|
mean value: 0.863531254607106
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.75 1. 0.7 0.85714286
|
|
0.72727273 0.88888889 0.66666667 1. ]
|
|
|
|
mean value: 0.846497113997114
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.85714286 1. 1. 0.85714286
|
|
1. 1. 0.75 0.875 ]
|
|
|
|
mean value: 0.8964285714285715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.80357143 1. 0.8125 0.86607143
|
|
0.78571429 0.92857143 0.66071429 0.9375 ]
|
|
|
|
mean value: 0.8544642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.77777778 0.66666667 1. 0.7 0.75
|
|
0.72727273 0.88888889 0.54545455 0.875 ]
|
|
|
|
mean value: 0.7681060606060606
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01221609 0.01339197 0.00892854 0.00857139 0.00862551 0.00868034
|
|
0.00853682 0.00880527 0.00853062 0.00853968]
|
|
|
|
mean value: 0.009482622146606445
|
|
|
|
key: score_time
|
|
value: [0.01381898 0.00916982 0.00887036 0.00863338 0.00858545 0.00860238
|
|
0.0085628 0.00857377 0.00856447 0.00868702]
|
|
|
|
mean value: 0.009206843376159669
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.25819889 0.46428571 0.37796447 0.13363062 -0.04029115
|
|
0.18898224 -0.07142857 0.47245559 0.04029115]
|
|
|
|
mean value: 0.1824088954335891
|
|
|
|
key: train_mcc
|
|
value: [0.42192651 0.42192651 0.39817312 0.37138669 0.42963201 0.44352148
|
|
0.42304382 0.38248522 0.37510519 0.40676841]
|
|
|
|
mean value: 0.40739689589467604
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.625 0.73333333 0.66666667 0.53333333 0.46666667
|
|
0.6 0.46666667 0.73333333 0.53333333]
|
|
|
|
mean value: 0.5858333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.69852941 0.69852941 0.68613139 0.67153285 0.70072993 0.71532847
|
|
0.70072993 0.68613139 0.67153285 0.68613139]
|
|
|
|
mean value: 0.6915306998711893
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.66666667 0.71428571 0.70588235 0.63157895 0.55555556
|
|
0.66666667 0.5 0.77777778 0.63157895]
|
|
|
|
mean value: 0.6449992628630399
|
|
|
|
key: train_fscore
|
|
value: [0.74213836 0.74213836 0.73619632 0.72727273 0.74846626 0.7483871
|
|
0.7388535 0.71523179 0.72392638 0.73619632]
|
|
|
|
mean value: 0.7358807120944473
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.71428571 0.6 0.5 0.45454545
|
|
0.6 0.5 0.7 0.54545455]
|
|
|
|
mean value: 0.5714285714285714
|
|
|
|
key: train_precision
|
|
value: [0.64835165 0.64835165 0.63829787 0.625 0.64893617 0.6744186
|
|
0.65168539 0.65060241 0.62105263 0.63157895]
|
|
|
|
mean value: 0.6438275325752001
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.71428571 0.85714286 0.85714286 0.71428571
|
|
0.75 0.5 0.875 0.75 ]
|
|
|
|
mean value: 0.7517857142857143
|
|
|
|
key: train_recall
|
|
value: [0.86764706 0.86764706 0.86956522 0.86956522 0.88405797 0.84057971
|
|
0.85294118 0.79411765 0.86764706 0.88235294]
|
|
|
|
mean value: 0.85961210571185
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.625 0.73214286 0.67857143 0.55357143 0.48214286
|
|
0.58928571 0.46428571 0.72321429 0.51785714]
|
|
|
|
mean value: 0.5866071428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.69852941 0.69852941 0.68478261 0.67007673 0.69938193 0.7144075
|
|
0.70183291 0.6869139 0.67295396 0.68755328]
|
|
|
|
mean value: 0.6914961636828644
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.5 0.55555556 0.54545455 0.46153846 0.38461538
|
|
0.5 0.33333333 0.63636364 0.46153846]
|
|
|
|
mean value: 0.4806970806970807
|
|
|
|
key: train_jcc
|
|
value: [0.59 0.59 0.58252427 0.57142857 0.59803922 0.59793814
|
|
0.58585859 0.55670103 0.56730769 0.58252427]
|
|
|
|
mean value: 0.5822321784228176
|
|
|
|
MCC on Blind test: 0.01
|
|
|
|
Accuracy on Blind test: 0.38
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00890756 0.00875044 0.00880671 0.00884891 0.00875044 0.008816
|
|
0.0088191 0.0089426 0.00888658 0.00880456]
|
|
|
|
mean value: 0.00883328914642334
|
|
|
|
key: score_time
|
|
value: [0.00866461 0.00874972 0.00858355 0.00862575 0.00877166 0.0087378
|
|
0.00872993 0.00858498 0.00864148 0.00872946]
|
|
|
|
mean value: 0.008681893348693848
|
|
|
|
key: test_mcc
|
|
value: [0. 0.25 0.19642857 0.56407607 0.49099025 0.26189246
|
|
0.19642857 0.47245559 0.46428571 0.64465837]
|
|
|
|
mean value: 0.35412156107816933
|
|
|
|
key: train_mcc
|
|
value: [0.5144434 0.54470478 0.54363493 0.50392153 0.52716245 0.54721488
|
|
0.48424445 0.51877014 0.50571263 0.5019846 ]
|
|
|
|
mean value: 0.5191793798033469
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.625 0.6 0.73333333 0.73333333 0.6
|
|
0.6 0.73333333 0.73333333 0.8 ]
|
|
|
|
mean value: 0.6658333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.75 0.76470588 0.76642336 0.74452555 0.75912409 0.76642336
|
|
0.72992701 0.75182482 0.74452555 0.74452555]
|
|
|
|
mean value: 0.7522005152425933
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.625 0.57142857 0.77777778 0.75 0.66666667
|
|
0.625 0.77777778 0.75 0.84210526]
|
|
|
|
mean value: 0.6985756056808688
|
|
|
|
key: train_fscore
|
|
value: [0.77631579 0.78947368 0.78947368 0.77419355 0.78145695 0.79220779
|
|
0.76433121 0.77631579 0.77124183 0.76821192]
|
|
|
|
mean value: 0.7783222202391937
|
|
|
|
key: test_precision
|
|
value: [0.5 0.625 0.57142857 0.63636364 0.66666667 0.54545455
|
|
0.625 0.7 0.75 0.72727273]
|
|
|
|
mean value: 0.6347186147186147
|
|
|
|
key: train_precision
|
|
value: [0.70238095 0.71428571 0.72289157 0.69767442 0.7195122 0.71764706
|
|
0.6741573 0.70238095 0.69411765 0.69879518]
|
|
|
|
mean value: 0.7043842989015312
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.57142857 1. 0.85714286 0.85714286
|
|
0.625 0.875 0.75 1. ]
|
|
|
|
mean value: 0.7910714285714285
|
|
|
|
key: train_recall
|
|
value: [0.86764706 0.88235294 0.86956522 0.86956522 0.85507246 0.88405797
|
|
0.88235294 0.86764706 0.86764706 0.85294118]
|
|
|
|
mean value: 0.8698849104859335
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.625 0.59821429 0.75 0.74107143 0.61607143
|
|
0.59821429 0.72321429 0.73214286 0.78571429]
|
|
|
|
mean value: 0.6669642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.75 0.76470588 0.76566496 0.74360614 0.75841858 0.7655584
|
|
0.73103154 0.75266411 0.74541773 0.74531117]
|
|
|
|
mean value: 0.7522378516624041
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.45454545 0.4 0.63636364 0.6 0.5
|
|
0.45454545 0.63636364 0.6 0.72727273]
|
|
|
|
mean value: 0.5437662337662338
|
|
|
|
key: train_jcc
|
|
value: [0.6344086 0.65217391 0.65217391 0.63157895 0.64130435 0.65591398
|
|
0.6185567 0.6344086 0.62765957 0.62365591]
|
|
|
|
mean value: 0.6371834493554671
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00872469 0.00941682 0.0093987 0.0094955 0.00943923 0.00875735
|
|
0.00831437 0.00911641 0.0095017 0.00948215]
|
|
|
|
mean value: 0.009164690971374512
|
|
|
|
key: score_time
|
|
value: [0.00979877 0.01038027 0.01041031 0.0104022 0.01029658 0.01435733
|
|
0.0094676 0.01413035 0.010638 0.01043248]
|
|
|
|
mean value: 0.011031389236450195
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.37796447 0.26189246 0.66143783 0.32732684 0.13363062
|
|
0.32732684 0.47245559 0.46428571 0.64465837]
|
|
|
|
mean value: 0.4048943205524593
|
|
|
|
key: train_mcc
|
|
value: [0.56273143 0.60616144 0.62969553 0.61333523 0.55409877 0.65057816
|
|
0.66024878 0.61423342 0.58468637 0.64091263]
|
|
|
|
mean value: 0.6116681772279047
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.6875 0.6 0.8 0.66666667 0.53333333
|
|
0.66666667 0.73333333 0.73333333 0.8 ]
|
|
|
|
mean value: 0.6845833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.77941176 0.80147059 0.81021898 0.80291971 0.77372263 0.82481752
|
|
0.82481752 0.80291971 0.78832117 0.81751825]
|
|
|
|
mean value: 0.8026137827393731
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.66666667 0.66666667 0.82352941 0.61538462 0.63157895
|
|
0.70588235 0.77777778 0.75 0.84210526]
|
|
|
|
mean value: 0.7206864429000651
|
|
|
|
key: train_fscore
|
|
value: [0.79166667 0.81118881 0.82666667 0.81879195 0.79194631 0.83098592
|
|
0.83783784 0.81632653 0.80272109 0.82758621]
|
|
|
|
mean value: 0.8155717978830668
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.71428571 0.54545455 0.7 0.66666667 0.5
|
|
0.66666667 0.7 0.75 0.72727273]
|
|
|
|
mean value: 0.6541774891774892
|
|
|
|
key: train_precision
|
|
value: [0.75 0.77333333 0.7654321 0.7625 0.7375 0.80821918
|
|
0.775 0.75949367 0.74683544 0.77922078]
|
|
|
|
mean value: 0.7657534503325787
|
|
|
|
key: test_recall
|
|
value: [1. 0.625 0.85714286 1. 0.57142857 0.85714286
|
|
0.75 0.875 0.75 1. ]
|
|
|
|
mean value: 0.8285714285714285
|
|
|
|
key: train_recall
|
|
value: [0.83823529 0.85294118 0.89855072 0.88405797 0.85507246 0.85507246
|
|
0.91176471 0.88235294 0.86764706 0.88235294]
|
|
|
|
mean value: 0.8728047740835465
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.6875 0.61607143 0.8125 0.66071429 0.55357143
|
|
0.66071429 0.72321429 0.73214286 0.78571429]
|
|
|
|
mean value: 0.6857142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.77941176 0.80147059 0.80956948 0.8023231 0.77312447 0.82459506
|
|
0.82544757 0.80349531 0.78889599 0.81798806]
|
|
|
|
mean value: 0.8026321398124467
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.5 0.5 0.7 0.44444444 0.46153846
|
|
0.54545455 0.63636364 0.6 0.72727273]
|
|
|
|
mean value: 0.5686502386502387
|
|
|
|
key: train_jcc
|
|
value: [0.65517241 0.68235294 0.70454545 0.69318182 0.65555556 0.71084337
|
|
0.72093023 0.68965517 0.67045455 0.70588235]
|
|
|
|
mean value: 0.6888573860114033
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01179934 0.01012301 0.01003814 0.01017427 0.00991893 0.00987267
|
|
0.00998878 0.01008558 0.01007128 0.01131511]
|
|
|
|
mean value: 0.010338711738586425
|
|
|
|
key: score_time
|
|
value: [0.00999904 0.00925827 0.00890064 0.00903559 0.00892496 0.00906014
|
|
0.00908375 0.0089345 0.00948644 0.00994992]
|
|
|
|
mean value: 0.009263324737548827
|
|
|
|
key: test_mcc
|
|
value: [0.40451992 0.5 0.19642857 0.60714286 0.875 0.46428571
|
|
0.76376262 0.73214286 0.49099025 0.6000992 ]
|
|
|
|
mean value: 0.563437198448388
|
|
|
|
key: train_mcc
|
|
value: [0.84051051 0.86774089 0.81031543 0.82480818 0.82480818 0.76668815
|
|
0.83951407 0.81027501 0.83947987 0.88320546]
|
|
|
|
mean value: 0.8307345767586535
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.75 0.6 0.8 0.93333333 0.73333333
|
|
0.86666667 0.86666667 0.73333333 0.8 ]
|
|
|
|
mean value: 0.7770833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.91911765 0.93382353 0.90510949 0.91240876 0.91240876 0.88321168
|
|
0.91970803 0.90510949 0.91970803 0.94160584]
|
|
|
|
mean value: 0.9152211249463289
|
|
|
|
key: test_fscore
|
|
value: [0.61538462 0.75 0.57142857 0.8 0.93333333 0.71428571
|
|
0.85714286 0.875 0.71428571 0.82352941]
|
|
|
|
mean value: 0.7654390217625512
|
|
|
|
key: train_fscore
|
|
value: [0.91603053 0.93430657 0.90510949 0.91304348 0.91304348 0.88571429
|
|
0.91970803 0.9037037 0.91851852 0.94117647]
|
|
|
|
mean value: 0.9150354556988868
|
|
|
|
key: test_precision
|
|
value: [0.8 0.75 0.57142857 0.75 0.875 0.71428571
|
|
1. 0.875 0.83333333 0.77777778]
|
|
|
|
mean value: 0.7946825396825397
|
|
|
|
key: train_precision
|
|
value: [0.95238095 0.92753623 0.91176471 0.91304348 0.91304348 0.87323944
|
|
0.91304348 0.91044776 0.92537313 0.94117647]
|
|
|
|
mean value: 0.9181049127660313
|
|
|
|
key: test_recall
|
|
value: [0.5 0.75 0.57142857 0.85714286 1. 0.71428571
|
|
0.75 0.875 0.625 0.875 ]
|
|
|
|
mean value: 0.7517857142857143
|
|
|
|
key: train_recall
|
|
value: [0.88235294 0.94117647 0.89855072 0.91304348 0.91304348 0.89855072
|
|
0.92647059 0.89705882 0.91176471 0.94117647]
|
|
|
|
mean value: 0.9123188405797101
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.75 0.59821429 0.80357143 0.9375 0.73214286
|
|
0.875 0.86607143 0.74107143 0.79464286]
|
|
|
|
mean value: 0.7785714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.91911765 0.93382353 0.90515772 0.91240409 0.91240409 0.88309889
|
|
0.91975703 0.90505115 0.91965047 0.94160273]
|
|
|
|
mean value: 0.9152067348678602
|
|
|
|
key: test_jcc
|
|
value: [0.44444444 0.6 0.4 0.66666667 0.875 0.55555556
|
|
0.75 0.77777778 0.55555556 0.7 ]
|
|
|
|
mean value: 0.6325
|
|
|
|
key: train_jcc
|
|
value: [0.84507042 0.87671233 0.82666667 0.84 0.84 0.79487179
|
|
0.85135135 0.82432432 0.84931507 0.88888889]
|
|
|
|
mean value: 0.8437200845898511
|
|
|
|
MCC on Blind test: -0.0
|
|
|
|
Accuracy on Blind test: 0.65
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.56858373 0.64376926 0.4885788 0.55860162 0.62052274 0.54586768
|
|
0.57091928 0.53351116 0.68408871 0.53369379]
|
|
|
|
mean value: 0.5748136758804321
|
|
|
|
key: score_time
|
|
value: [0.01217818 0.01202369 0.01197481 0.01199365 0.01205015 0.012012
|
|
0.0120542 0.01204777 0.01204062 0.01203394]
|
|
|
|
mean value: 0.01204090118408203
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.62994079 0.19642857 1. 0.66143783 0.73214286
|
|
0.6000992 0.75592895 0.73214286 0.6000992 ]
|
|
|
|
mean value: 0.642461802363988
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.8125 0.6 1. 0.8 0.86666667
|
|
0.8 0.86666667 0.86666667 0.8 ]
|
|
|
|
mean value: 0.81625
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.8 0.57142857 1. 0.82352941 0.85714286
|
|
0.82352941 0.88888889 0.875 0.82352941]
|
|
|
|
mean value: 0.8177334267040149
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.85714286 0.57142857 1. 0.7 0.85714286
|
|
0.77777778 0.8 0.875 0.77777778]
|
|
|
|
mean value: 0.8049603174603175
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.625 0.75 0.57142857 1. 1. 0.85714286
|
|
0.875 1. 0.875 0.875 ]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.8125 0.59821429 1. 0.8125 0.86607143
|
|
0.79464286 0.85714286 0.86607143 0.79464286]
|
|
|
|
mean value: 0.8151785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.66666667 0.4 1. 0.7 0.75
|
|
0.7 0.8 0.77777778 0.7 ]
|
|
|
|
mean value: 0.705
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01505685 0.01453471 0.01128817 0.01089859 0.01046872 0.01097035
|
|
0.01134324 0.0110693 0.0112195 0.01129436]
|
|
|
|
mean value: 0.011814379692077636
|
|
|
|
key: score_time
|
|
value: [0.01152134 0.00902057 0.00866437 0.00855374 0.00851226 0.00853801
|
|
0.00877357 0.00879598 0.00872588 0.00893998]
|
|
|
|
mean value: 0.009004569053649903
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.75 0.875 0.875 0.73214286 0.87287156
|
|
1. 1. 0.73214286 0.87287156]
|
|
|
|
mean value: 0.859194593986185
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 0.93333333 0.93333333 0.86666667 0.93333333
|
|
1. 1. 0.86666667 0.93333333]
|
|
|
|
mean value: 0.9279166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.875 0.93333333 0.93333333 0.85714286 0.92307692
|
|
1. 1. 0.875 0.94117647]
|
|
|
|
mean value: 0.9279239388062918
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.875 0.875 0.875 0.85714286 1.
|
|
1. 1. 0.875 0.88888889]
|
|
|
|
mean value: 0.9134920634920635
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 0.85714286 0.85714286
|
|
1. 1. 0.875 1. ]
|
|
|
|
mean value: 0.9464285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.875 0.9375 0.9375 0.86607143 0.92857143
|
|
1. 1. 0.86607143 0.92857143]
|
|
|
|
mean value: 0.9276785714285715
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.77777778 0.875 0.875 0.75 0.85714286
|
|
1. 1. 0.77777778 0.88888889]
|
|
|
|
mean value: 0.8690476190476191
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08812714 0.08732772 0.08859634 0.08848572 0.08767033 0.08787918
|
|
0.08768225 0.08852649 0.08794308 0.08818316]
|
|
|
|
mean value: 0.08804214000701904
|
|
|
|
key: score_time
|
|
value: [0.01718473 0.01735854 0.01743412 0.01702428 0.01767778 0.01741862
|
|
0.01739454 0.01736569 0.01758718 0.01760912]
|
|
|
|
mean value: 0.01740546226501465
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.37796447 0.49099025 0.73214286 0.875 0.73214286
|
|
0.73214286 0.875 0.66143783 0.32732684]
|
|
|
|
mean value: 0.6578744629830401
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.6875 0.73333333 0.86666667 0.93333333 0.86666667
|
|
0.86666667 0.93333333 0.8 0.66666667]
|
|
|
|
mean value: 0.8229166666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.66666667 0.75 0.85714286 0.93333333 0.85714286
|
|
0.875 0.93333333 0.76923077 0.70588235]
|
|
|
|
mean value: 0.820487502693385
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.71428571 0.66666667 0.85714286 0.875 0.85714286
|
|
0.875 1. 1. 0.66666667]
|
|
|
|
mean value: 0.8511904761904762
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.85714286 0.85714286 1. 0.85714286
|
|
0.875 0.875 0.625 0.75 ]
|
|
|
|
mean value: 0.8071428571428572
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.6875 0.74107143 0.86607143 0.9375 0.86607143
|
|
0.86607143 0.9375 0.8125 0.66071429]
|
|
|
|
mean value: 0.825
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.5 0.6 0.75 0.875 0.75
|
|
0.77777778 0.875 0.625 0.54545455]
|
|
|
|
mean value: 0.7048232323232323
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00902843 0.00919366 0.00891805 0.00889707 0.00913405 0.0090549
|
|
0.00896406 0.00876498 0.00908923 0.00890517]
|
|
|
|
mean value: 0.008994960784912109
|
|
|
|
key: score_time
|
|
value: [0.0090282 0.00873184 0.00872302 0.00881553 0.00873852 0.00867462
|
|
0.00864935 0.00869679 0.00871325 0.00864553]
|
|
|
|
mean value: 0.008741664886474609
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.37796447 0.49099025 0.09449112 0.60714286 0.19642857
|
|
0.33928571 0.32732684 0.6000992 0.46428571]
|
|
|
|
mean value: 0.40144125144326637
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.6875 0.73333333 0.53333333 0.8 0.6
|
|
0.66666667 0.66666667 0.8 0.73333333]
|
|
|
|
mean value: 0.6970833333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.66666667 0.75 0.58823529 0.8 0.57142857
|
|
0.66666667 0.70588235 0.82352941 0.75 ]
|
|
|
|
mean value: 0.7100186741363211
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.71428571 0.66666667 0.5 0.75 0.57142857
|
|
0.71428571 0.66666667 0.77777778 0.75 ]
|
|
|
|
mean value: 0.6811111111111111
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.625 0.85714286 0.71428571 0.85714286 0.57142857
|
|
0.625 0.75 0.875 0.75 ]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.6875 0.74107143 0.54464286 0.80357143 0.59821429
|
|
0.66964286 0.66071429 0.79464286 0.73214286]
|
|
|
|
mean value: 0.6982142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.5 0.6 0.41666667 0.66666667 0.4
|
|
0.5 0.54545455 0.7 0.6 ]
|
|
|
|
mean value: 0.5565151515151515
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.16274762 1.21323156 1.15071106 1.15295625 1.14983153 1.14728713
|
|
1.15696621 1.16246438 1.17218447 1.15218377]
|
|
|
|
mean value: 1.1620563983917236
|
|
|
|
key: score_time
|
|
value: [0.08934927 0.08911204 0.08818769 0.0877502 0.08845139 0.0878191
|
|
0.08764696 0.09043884 0.09330297 0.0872798 ]
|
|
|
|
mean value: 0.08893382549285889
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.62994079 0.60714286 1. 0.875 0.75592895
|
|
0.73214286 0.875 0.76376262 0.46428571]
|
|
|
|
mean value: 0.7585120882452765
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.8125 0.8 1. 0.93333333 0.86666667
|
|
0.86666667 0.93333333 0.86666667 0.73333333]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.8 0.8 1. 0.93333333 0.83333333
|
|
0.875 0.93333333 0.85714286 0.75 ]
|
|
|
|
mean value: 0.871547619047619
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 0.75 1. 0.875 1.
|
|
0.875 1. 1. 0.75 ]
|
|
|
|
mean value: 0.9107142857142857
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.75 0.85714286 1. 1. 0.71428571
|
|
0.875 0.875 0.75 0.75 ]
|
|
|
|
mean value: 0.8446428571428571
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.8125 0.80357143 1. 0.9375 0.85714286
|
|
0.86607143 0.9375 0.875 0.73214286]
|
|
|
|
mean value: 0.8758928571428571
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
[0.875 0.66666667 0.66666667 1. 0.875 0.71428571
|
|
0.77777778 0.875 0.75 0.6 ]
|
|
|
|
mean value: 0.7800396825396825
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.85972571 0.85355878 0.91043258 0.87334061 0.88241315 0.88078475
|
|
0.8900435 0.86408782 0.90165615 0.90448761]
|
|
|
|
mean value: 0.8820530652999878
|
|
|
|
key: score_time
|
|
value: [0.1766603 0.22412515 0.13609123 0.21713638 0.24007654 0.21767449
|
|
0.19926095 0.18716145 0.25138688 0.22168207]
|
|
|
|
mean value: 0.20712554454803467
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.62994079 0.49099025 1. 0.875 0.75592895
|
|
0.60714286 0.875 0.76376262 0.76376262]
|
|
|
|
mean value: 0.764344517988115
|
|
|
|
key: train_mcc
|
|
value: [0.97100831 0.97100831 0.98550725 0.97122151 0.97122151 0.97122151
|
|
0.97120941 0.95710706 0.98550418 0.95629932]
|
|
|
|
mean value: 0.9711308369300188
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.8125 0.73333333 1. 0.93333333 0.86666667
|
|
0.8 0.93333333 0.86666667 0.86666667]
|
|
|
|
mean value: 0.875
|
|
|
|
key: train_accuracy
|
|
value: [0.98529412 0.98529412 0.99270073 0.98540146 0.98540146 0.98540146
|
|
0.98540146 0.97810219 0.99270073 0.97810219]
|
|
|
|
mean value: 0.9853799914126234
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.8 0.75 1. 0.93333333 0.83333333
|
|
0.8 0.93333333 0.85714286 0.85714286]
|
|
|
|
mean value: 0.8697619047619047
|
|
|
|
key: train_fscore
|
|
value: [0.98507463 0.98507463 0.99270073 0.98529412 0.98529412 0.98529412
|
|
0.98507463 0.97744361 0.99259259 0.97777778]
|
|
|
|
mean value: 0.9851620942858126
|
|
|
|
key: test_precision
|
|
value: [1. 0.85714286 0.66666667 1. 0.875 1.
|
|
0.85714286 1. 1. 1. ]
|
|
|
|
mean value: 0.9255952380952381
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.98507463]
|
|
|
|
mean value: 0.9985074626865672
|
|
|
|
key: test_recall
|
|
value: [0.875 0.75 0.85714286 1. 1. 0.71428571
|
|
0.75 0.875 0.75 0.75 ]
|
|
|
|
mean value: 0.8321428571428572
|
|
|
|
key: train_recall
|
|
value: [0.97058824 0.97058824 0.98550725 0.97101449 0.97101449 0.97101449
|
|
0.97058824 0.95588235 0.98529412 0.97058824]
|
|
|
|
mean value: 0.9722080136402387
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.8125 0.74107143 1. 0.9375 0.85714286
|
|
0.80357143 0.9375 0.875 0.875 ]
|
|
|
|
mean value: 0.8776785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.98529412 0.98529412 0.99275362 0.98550725 0.98550725 0.98550725
|
|
0.98529412 0.97794118 0.99264706 0.97804774]
|
|
|
|
mean value: 0.98537936913896
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.66666667 0.6 1. 0.875 0.71428571
|
|
0.66666667 0.875 0.75 0.75 ]
|
|
|
|
mean value: 0.7772619047619047
|
|
|
|
key: train_jcc
|
|
value: [0.97058824 0.97058824 0.98550725 0.97101449 0.97101449 0.97101449
|
|
0.97058824 0.95588235 0.98529412 0.95652174]
|
|
|
|
mean value: 0.9708013640238704
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.81
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02130222 0.00860238 0.00862336 0.00866222 0.00853252 0.00857735
|
|
0.00868011 0.00864792 0.00861239 0.00864911]
|
|
|
|
mean value: 0.009888958930969239
|
|
|
|
key: score_time
|
|
value: [0.01332974 0.00847292 0.00869036 0.00839376 0.00838685 0.00843501
|
|
0.00849009 0.00850749 0.00847769 0.00854087]
|
|
|
|
mean value: 0.008972477912902833
|
|
|
|
key: test_mcc
|
|
value: [0. 0.25 0.19642857 0.56407607 0.49099025 0.26189246
|
|
0.19642857 0.47245559 0.46428571 0.64465837]
|
|
|
|
mean value: 0.35412156107816933
|
|
|
|
key: train_mcc
|
|
value: [0.5144434 0.54470478 0.54363493 0.50392153 0.52716245 0.54721488
|
|
0.48424445 0.51877014 0.50571263 0.5019846 ]
|
|
|
|
mean value: 0.5191793798033469
|
|
|
|
key: test_accuracy
|
|
value: [0.5 0.625 0.6 0.73333333 0.73333333 0.6
|
|
0.6 0.73333333 0.73333333 0.8 ]
|
|
|
|
mean value: 0.6658333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.75 0.76470588 0.76642336 0.74452555 0.75912409 0.76642336
|
|
0.72992701 0.75182482 0.74452555 0.74452555]
|
|
|
|
mean value: 0.7522005152425933
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.625 0.57142857 0.77777778 0.75 0.66666667
|
|
0.625 0.77777778 0.75 0.84210526]
|
|
|
|
mean value: 0.6985756056808688
|
|
|
|
key: train_fscore
|
|
value: [0.77631579 0.78947368 0.78947368 0.77419355 0.78145695 0.79220779
|
|
0.76433121 0.77631579 0.77124183 0.76821192]
|
|
|
|
mean value: 0.7783222202391937
|
|
|
|
key: test_precision
|
|
value: [0.5 0.625 0.57142857 0.63636364 0.66666667 0.54545455
|
|
0.625 0.7 0.75 0.72727273]
|
|
|
|
mean value: 0.6347186147186147
|
|
|
|
key: train_precision
|
|
value: [0.70238095 0.71428571 0.72289157 0.69767442 0.7195122 0.71764706
|
|
0.6741573 0.70238095 0.69411765 0.69879518]
|
|
|
|
mean value: 0.7043842989015312
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.57142857 1. 0.85714286 0.85714286
|
|
0.625 0.875 0.75 1. ]
|
|
|
|
mean value: 0.7910714285714285
|
|
|
|
key: train_recall
|
|
value: [0.86764706 0.88235294 0.86956522 0.86956522 0.85507246 0.88405797
|
|
0.88235294 0.86764706 0.86764706 0.85294118]
|
|
|
|
mean value: 0.8698849104859335
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.625 0.59821429 0.75 0.74107143 0.61607143
|
|
0.59821429 0.72321429 0.73214286 0.78571429]
|
|
|
|
mean value: 0.6669642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.75 0.76470588 0.76566496 0.74360614 0.75841858 0.7655584
|
|
0.73103154 0.75266411 0.74541773 0.74531117]
|
|
|
|
mean value: 0.7522378516624041
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.45454545 0.4 0.63636364 0.6 0.5
|
|
0.45454545 0.63636364 0.6 0.72727273]
|
|
|
|
mean value: 0.5437662337662338
|
|
|
|
key: train_jcc
|
|
value: [0.6344086 0.65217391 0.65217391 0.63157895 0.64130435 0.65591398
|
|
0.6185567 0.6344086 0.62765957 0.62365591]
|
|
|
|
mean value: 0.6371834493554671
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.04765677 0.15343904 0.03782272 0.03994322 0.04357862 0.04628658
|
|
0.04569268 0.04593277 0.04655242 0.04481316]
|
|
|
|
mean value: 0.055171799659729
|
|
|
|
key: score_time
|
|
value: [0.01268363 0.01065946 0.01116037 0.01030374 0.01018286 0.01021385
|
|
0.01012373 0.01024175 0.01021934 0.0104804 ]
|
|
|
|
mean value: 0.010626912117004395
|
|
|
|
key: test_mcc
|
|
value: [1. 0.75 1. 1. 0.73214286 1.
|
|
0.87287156 1. 0.73214286 0.76376262]
|
|
|
|
mean value: 0.8850919891055657
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.875 1. 1. 0.86666667 1.
|
|
0.93333333 1. 0.86666667 0.86666667]
|
|
|
|
mean value: 0.9408333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 1. 1. 0.85714286 1.
|
|
0.94117647 1. 0.875 0.85714286]
|
|
|
|
mean value: 0.940546218487395
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 1. 1. 0.85714286 1.
|
|
0.88888889 1. 0.875 1. ]
|
|
|
|
mean value: 0.9496031746031746
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 0.85714286 1.
|
|
1. 1. 0.875 0.75 ]
|
|
|
|
mean value: 0.9357142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.875 1. 1. 0.86607143 1.
|
|
0.92857143 1. 0.86607143 0.875 ]
|
|
|
|
mean value: 0.9410714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 1. 1. 0.75 1.
|
|
0.88888889 1. 0.77777778 0.75 ]
|
|
|
|
mean value: 0.8944444444444445
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02869916 0.05144167 0.04765964 0.04727054 0.04757094 0.04816079
|
|
0.04739809 0.04720759 0.04738903 0.04750156]
|
|
|
|
mean value: 0.0460299015045166
|
|
|
|
key: score_time
|
|
value: [0.02280951 0.02008939 0.02200198 0.01993203 0.02164054 0.02243447
|
|
0.02050519 0.02272391 0.02087831 0.02276683]
|
|
|
|
mean value: 0.021578216552734376
|
|
|
|
key: test_mcc
|
|
value: [0.5 0.62994079 0.37796447 0.36689969 0.13363062 0.37796447
|
|
0.33928571 0.34247476 0.32732684 0.6000992 ]
|
|
|
|
mean value: 0.3995586555675519
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.8125 0.66666667 0.6 0.53333333 0.66666667
|
|
0.66666667 0.66666667 0.66666667 0.8 ]
|
|
|
|
mean value: 0.6829166666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.82352941 0.70588235 0.7 0.63157895 0.70588235
|
|
0.66666667 0.73684211 0.70588235 0.82352941]
|
|
|
|
mean value: 0.7249793601651187
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.77777778 0.6 0.53846154 0.5 0.6
|
|
0.71428571 0.63636364 0.66666667 0.77777778]
|
|
|
|
mean value: 0.6561333111333111
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.85714286 1. 0.85714286 0.85714286
|
|
0.625 0.875 0.75 0.875 ]
|
|
|
|
mean value: 0.8321428571428571
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.8125 0.67857143 0.625 0.55357143 0.67857143
|
|
0.66964286 0.65178571 0.66071429 0.79464286]
|
|
|
|
mean value: 0.6875
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.7 0.54545455 0.53846154 0.46153846 0.54545455
|
|
0.5 0.58333333 0.54545455 0.7 ]
|
|
|
|
mean value: 0.5719696969696969
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.59
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0237956 0.00884223 0.00858641 0.0084517 0.00839496 0.00843644
|
|
0.00836205 0.00869083 0.00836253 0.00843644]
|
|
|
|
mean value: 0.01003592014312744
|
|
|
|
key: score_time
|
|
value: [0.00963569 0.00868869 0.00838709 0.00837803 0.00836611 0.00832963
|
|
0.00844836 0.0084126 0.00840187 0.00849652]
|
|
|
|
mean value: 0.008554458618164062
|
|
|
|
key: test_mcc
|
|
value: [ 0.25 0.37796447 0.49099025 0.60714286 0.76376262 -0.04029115
|
|
0.32732684 -0.07142857 0.07142857 0.47245559]
|
|
|
|
mean value: 0.3249351477423294
|
|
|
|
key: train_mcc
|
|
value: [0.36066785 0.35682062 0.41648963 0.30654574 0.33012155 0.37638937
|
|
0.34831003 0.39368469 0.40740412 0.36064115]
|
|
|
|
mean value: 0.36570747455665553
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.6875 0.73333333 0.8 0.86666667 0.46666667
|
|
0.66666667 0.46666667 0.53333333 0.73333333]
|
|
|
|
mean value: 0.6579166666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.67647059 0.67647059 0.7080292 0.64963504 0.66423358 0.68613139
|
|
0.67153285 0.69343066 0.70072993 0.67883212]
|
|
|
|
mean value: 0.6805495920996136
|
|
|
|
key: test_fscore
|
|
value: [0.625 0.70588235 0.75 0.8 0.875 0.55555556
|
|
0.70588235 0.5 0.53333333 0.77777778]
|
|
|
|
mean value: 0.682843137254902
|
|
|
|
key: train_fscore
|
|
value: [0.70666667 0.69863014 0.71830986 0.68831169 0.68493151 0.7114094
|
|
0.69387755 0.71621622 0.72108844 0.69444444]
|
|
|
|
mean value: 0.7033885900997274
|
|
|
|
key: test_precision
|
|
value: [0.625 0.66666667 0.66666667 0.75 0.77777778 0.45454545
|
|
0.66666667 0.5 0.57142857 0.7 ]
|
|
|
|
mean value: 0.6378751803751803
|
|
|
|
key: train_precision
|
|
value: [0.64634146 0.65384615 0.69863014 0.62352941 0.64935065 0.6625
|
|
0.64556962 0.6625 0.67088608 0.65789474]
|
|
|
|
mean value: 0.6571048248407082
|
|
|
|
key: test_recall
|
|
value: [0.625 0.75 0.85714286 0.85714286 1. 0.71428571
|
|
0.75 0.5 0.5 0.875 ]
|
|
|
|
mean value: 0.7428571428571429
|
|
|
|
key: train_recall
|
|
value: [0.77941176 0.75 0.73913043 0.76811594 0.72463768 0.76811594
|
|
0.75 0.77941176 0.77941176 0.73529412]
|
|
|
|
mean value: 0.7573529411764706
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.6875 0.74107143 0.80357143 0.875 0.48214286
|
|
0.66071429 0.46428571 0.53571429 0.72321429]
|
|
|
|
mean value: 0.6598214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.67647059 0.67647059 0.70780051 0.64876385 0.66378943 0.68552856
|
|
0.67210145 0.69405371 0.70130009 0.67924126]
|
|
|
|
mean value: 0.6805520034100596
|
|
|
|
key: test_jcc
|
|
value: [0.45454545 0.54545455 0.6 0.66666667 0.77777778 0.38461538
|
|
0.54545455 0.33333333 0.36363636 0.63636364]
|
|
|
|
mean value: 0.5307847707847708
|
|
|
|
key: train_jcc
|
|
value: [0.54639175 0.53684211 0.56043956 0.52475248 0.52083333 0.55208333
|
|
0.53125 0.55789474 0.56382979 0.53191489]
|
|
|
|
mean value: 0.5426231977887398
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.49
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01028538 0.01399684 0.01461887 0.01369882 0.01696754 0.01587057
|
|
0.01506424 0.0152986 0.01567125 0.01478696]
|
|
|
|
mean value: 0.014625906944274902
|
|
|
|
key: score_time
|
|
value: [0.00841475 0.01135182 0.01127791 0.01128936 0.01133323 0.01156378
|
|
0.01150846 0.01161075 0.01152873 0.01164389]
|
|
|
|
mean value: 0.011152267456054688
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.62994079 0.60714286 1. 0.6000992 0.73214286
|
|
0.41931393 0.75592895 0.73214286 0.73214286]
|
|
|
|
mean value: 0.6725252075270663
|
|
|
|
key: train_mcc
|
|
value: [0.92737353 0.97058824 0.97122151 0.92710997 0.92951942 0.97080136
|
|
0.68283343 0.90259957 0.97080136 0.95630861]
|
|
|
|
mean value: 0.920915701445411
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.8125 0.8 1. 0.8 0.86666667
|
|
0.66666667 0.86666667 0.86666667 0.86666667]
|
|
|
|
mean value: 0.8295833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.96323529 0.98529412 0.98540146 0.96350365 0.96350365 0.98540146
|
|
0.81751825 0.94890511 0.98540146 0.97810219]
|
|
|
|
mean value: 0.9576266638042078
|
|
|
|
key: test_fscore
|
|
value: [0.71428571 0.82352941 0.8 1. 0.76923077 0.85714286
|
|
0.76190476 0.88888889 0.875 0.875 ]
|
|
|
|
mean value: 0.8364982403217698
|
|
|
|
key: train_fscore
|
|
value: [0.96402878 0.98529412 0.98529412 0.96350365 0.96240602 0.98550725
|
|
0.8447205 0.95104895 0.98529412 0.97810219]
|
|
|
|
mean value: 0.9605199678693419
|
|
|
|
key: test_precision
|
|
value: [0.83333333 0.77777778 0.75 1. 0.83333333 0.85714286
|
|
0.61538462 0.8 0.875 0.875 ]
|
|
|
|
mean value: 0.8216971916971917
|
|
|
|
key: train_precision
|
|
value: [0.94366197 0.98529412 1. 0.97058824 1. 0.98550725
|
|
0.7311828 0.90666667 0.98529412 0.97101449]
|
|
|
|
mean value: 0.9479209643915247
|
|
|
|
key: test_recall
|
|
value: [0.625 0.875 0.85714286 1. 0.71428571 0.85714286
|
|
1. 1. 0.875 0.875 ]
|
|
|
|
mean value: 0.8678571428571429
|
|
|
|
key: train_recall
|
|
value: [0.98529412 0.98529412 0.97101449 0.95652174 0.92753623 0.98550725
|
|
1. 1. 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9781756180733163
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.8125 0.80357143 1. 0.79464286 0.86607143
|
|
0.64285714 0.85714286 0.86607143 0.86607143]
|
|
|
|
mean value: 0.8258928571428571
|
|
|
|
key: train_roc_auc
|
|
value: [0.96323529 0.98529412 0.98550725 0.96355499 0.96376812 0.98540068
|
|
0.81884058 0.94927536 0.98540068 0.97815431]
|
|
|
|
mean value: 0.957843137254902
|
|
|
|
key: test_jcc
|
|
value: [0.55555556 0.7 0.66666667 1. 0.625 0.75
|
|
0.61538462 0.8 0.77777778 0.77777778]
|
|
|
|
mean value: 0.7268162393162393
|
|
|
|
key: train_jcc
|
|
value: [0.93055556 0.97101449 0.97101449 0.92957746 0.92753623 0.97142857
|
|
0.7311828 0.90666667 0.97101449 0.95714286]
|
|
|
|
mean value: 0.9267133621426236
|
|
|
|
MCC on Blind test: 0.16
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01456738 0.01306653 0.01286817 0.01258302 0.01323724 0.01244307
|
|
0.01299739 0.0136261 0.01343799 0.01282883]
|
|
|
|
mean value: 0.013165569305419922
|
|
|
|
key: score_time
|
|
value: [0.01136112 0.01127458 0.01129842 0.01130366 0.01122713 0.01127219
|
|
0.01125145 0.01158643 0.01138997 0.01129389]
|
|
|
|
mean value: 0.011325883865356445
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.51639778 0.26189246 0.46770717 0.73214286 0.20044593
|
|
0.60714286 1. 0.60714286 0.76376262]
|
|
|
|
mean value: 0.5786575323186889
|
|
|
|
key: train_mcc
|
|
value: [0.88852332 0.82402205 0.94318882 0.41647809 0.78854812 0.37364893
|
|
0.92787101 0.86311873 0.94318882 0.86311873]
|
|
|
|
mean value: 0.7831706632681725
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.6 0.66666667 0.86666667 0.6
|
|
0.8 1. 0.8 0.86666667]
|
|
|
|
mean value: 0.77625
|
|
|
|
key: train_accuracy
|
|
value: [0.94117647 0.90441176 0.97080292 0.64963504 0.88321168 0.62043796
|
|
0.96350365 0.9270073 0.97080292 0.9270073 ]
|
|
|
|
mean value: 0.8757996994418206
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.77777778 0.66666667 0.73684211 0.85714286 0.4
|
|
0.8 1. 0.8 0.85714286]
|
|
|
|
mean value: 0.7695572263993316
|
|
|
|
key: train_fscore
|
|
value: [0.94444444 0.91275168 0.97183099 0.74193548 0.86885246 0.39534884
|
|
0.96240602 0.92063492 0.96969697 0.92063492]
|
|
|
|
mean value: 0.8608536714313355
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.7 0.54545455 0.58333333 0.85714286 0.66666667
|
|
0.85714286 1. 0.85714286 1. ]
|
|
|
|
mean value: 0.7924025974025973
|
|
|
|
key: train_precision
|
|
value: [0.89473684 0.83950617 0.94520548 0.58974359 1. 1.
|
|
0.98461538 1. 1. 1. ]
|
|
|
|
mean value: 0.9253807468755798
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.85714286 1. 0.85714286 0.28571429
|
|
0.75 1. 0.75 0.75 ]
|
|
|
|
mean value: 0.7875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 0.76811594 0.24637681
|
|
0.94117647 0.85294118 0.94117647 0.85294118]
|
|
|
|
mean value: 0.8602728047740835
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.61607143 0.6875 0.86607143 0.58035714
|
|
0.80357143 1. 0.80357143 0.875 ]
|
|
|
|
mean value: 0.7794642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.94117647 0.90441176 0.97058824 0.64705882 0.88405797 0.62318841
|
|
0.96334186 0.92647059 0.97058824 0.92647059]
|
|
|
|
mean value: 0.8757352941176471
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.63636364 0.5 0.58333333 0.75 0.25
|
|
0.66666667 1. 0.66666667 0.75 ]
|
|
|
|
mean value: 0.646969696969697
|
|
|
|
key: train_jcc
|
|
value: [0.89473684 0.83950617 0.94520548 0.58974359 0.76811594 0.24637681
|
|
0.92753623 0.85294118 0.94117647 0.85294118]
|
|
|
|
mean value: 0.7858279893177071
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.11904359 0.10382509 0.10486364 0.10416913 0.10492015 0.10402918
|
|
0.10552669 0.10339594 0.10672402 0.10487676]
|
|
|
|
mean value: 0.10613741874694824
|
|
|
|
key: score_time
|
|
value: [0.01462102 0.01453567 0.01501799 0.01468349 0.01466584 0.01468468
|
|
0.01471519 0.01495481 0.01457691 0.01460719]
|
|
|
|
mean value: 0.014706277847290039
|
|
|
|
key: test_mcc
|
|
value: [1. 0.75 0.76376262 1. 0.73214286 0.87287156
|
|
0.75592895 1. 0.73214286 0.60714286]
|
|
|
|
mean value: 0.8213991694216969
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.875 0.86666667 1. 0.86666667 0.93333333
|
|
0.86666667 1. 0.86666667 0.8 ]
|
|
|
|
mean value: 0.9075
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 0.875 1. 0.85714286 0.92307692
|
|
0.88888889 1. 0.875 0.8 ]
|
|
|
|
mean value: 0.9094108669108669
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.77777778 1. 0.85714286 1.
|
|
0.8 1. 0.875 0.85714286]
|
|
|
|
mean value: 0.9042063492063492
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 0.85714286 0.85714286
|
|
1. 1. 0.875 0.75 ]
|
|
|
|
mean value: 0.9214285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.875 0.875 1. 0.86607143 0.92857143
|
|
0.85714286 1. 0.86607143 0.80357143]
|
|
|
|
mean value: 0.9071428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 0.77777778 1. 0.75 0.85714286
|
|
0.8 1. 0.77777778 0.66666667]
|
|
|
|
mean value: 0.8407142857142857
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.74
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03904724 0.03519297 0.03736401 0.05492711 0.04780269 0.04455471
|
|
0.0496397 0.02868819 0.02949524 0.03737569]
|
|
|
|
mean value: 0.040408754348754884
|
|
|
|
key: score_time
|
|
value: [0.02148342 0.03485632 0.02557969 0.03648186 0.01914907 0.03731179
|
|
0.02533007 0.02252865 0.01633787 0.03448296]
|
|
|
|
mean value: 0.027354168891906738
|
|
|
|
key: test_mcc
|
|
value: [1. 0.75 1. 1. 0.73214286 0.73214286
|
|
0.87287156 1. 0.73214286 0.76376262]
|
|
|
|
mean value: 0.8583062748198514
|
|
|
|
key: train_mcc
|
|
value: [0.98540068 1. 0.98550725 1. 0.98550725 1.
|
|
1. 0.98550725 1. 1. ]
|
|
|
|
mean value: 0.9941922415380265
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.875 1. 1. 0.86666667 0.86666667
|
|
0.93333333 1. 0.86666667 0.86666667]
|
|
|
|
mean value: 0.9275
|
|
|
|
key: train_accuracy
|
|
value: [0.99264706 1. 0.99270073 1. 0.99270073 1.
|
|
1. 0.99270073 1. 1. ]
|
|
|
|
mean value: 0.9970749248604551
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 1. 1. 0.85714286 0.85714286
|
|
0.94117647 1. 0.875 0.85714286]
|
|
|
|
mean value: 0.9262605042016807
|
|
|
|
key: train_fscore
|
|
value: [0.99259259 1. 0.99270073 1. 0.99270073 1.
|
|
1. 0.99270073 1. 1. ]
|
|
|
|
mean value: 0.9970694782373615
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 1. 1. 0.85714286 0.85714286
|
|
0.88888889 1. 0.875 1. ]
|
|
|
|
mean value: 0.9353174603174603
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 0.98550725 1. 1. ]
|
|
|
|
mean value: 0.9985507246376811
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 0.85714286 0.85714286
|
|
1. 1. 0.875 0.75 ]
|
|
|
|
mean value: 0.9214285714285714
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 0.98550725 1. 0.98550725 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9956308610400683
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.875 1. 1. 0.86607143 0.86607143
|
|
0.92857143 1. 0.86607143 0.875 ]
|
|
|
|
mean value: 0.9276785714285715
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 1. 0.99275362 1. 0.99275362 1.
|
|
1. 0.99275362 1. 1. ]
|
|
|
|
mean value: 0.9970907928388747
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 1. 1. 0.75 0.75
|
|
0.88888889 1. 0.77777778 0.75 ]
|
|
|
|
mean value: 0.8694444444444445
|
|
|
|
key: train_jcc
|
|
value: [0.98529412 1. 0.98550725 1. 0.98550725 1.
|
|
1. 0.98550725 1. 1. ]
|
|
|
|
mean value: 0.9941815856777494
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.85
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10467362 0.06222582 0.04387116 0.04423785 0.0438931 0.05345726
|
|
0.04401588 0.04991961 0.06123614 0.01985145]
|
|
|
|
mean value: 0.052738189697265625
|
|
|
|
key: score_time
|
|
value: [0.0277462 0.02309895 0.01929665 0.02137995 0.02270675 0.01724195
|
|
0.02074289 0.02189159 0.02425289 0.01265621]
|
|
|
|
mean value: 0.02110140323638916
|
|
|
|
key: test_mcc
|
|
value: [0.57735027 0.37796447 0.26189246 0.49099025 0.66143783 0.46770717
|
|
0.75592895 0.46428571 0.49099025 0.34247476]
|
|
|
|
mean value: 0.48910221326969133
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.6875 0.6 0.73333333 0.8 0.66666667
|
|
0.86666667 0.73333333 0.73333333 0.66666667]
|
|
|
|
mean value: 0.72375
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.66666667 0.66666667 0.75 0.82352941 0.73684211
|
|
0.88888889 0.75 0.71428571 0.73684211]
|
|
|
|
mean value: 0.7533721558798958
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.71428571 0.54545455 0.66666667 0.7 0.58333333
|
|
0.8 0.75 0.83333333 0.63636364]
|
|
|
|
mean value: 0.6896103896103896
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.625 0.85714286 0.85714286 1. 1.
|
|
1. 0.75 0.625 0.875 ]
|
|
|
|
mean value: 0.8589285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.6875 0.61607143 0.74107143 0.8125 0.6875
|
|
0.85714286 0.73214286 0.74107143 0.65178571]
|
|
|
|
mean value: 0.7276785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.5 0.5 0.6 0.7 0.58333333
|
|
0.8 0.6 0.55555556 0.58333333]
|
|
|
|
mean value: 0.6088888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.08
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.27606034 0.25747943 0.26339436 0.26111817 0.21840525 0.25673342
|
|
0.26478767 0.25883365 0.26374054 0.26239896]
|
|
|
|
mean value: 0.2582951784133911
|
|
|
|
key: score_time
|
|
value: [0.00913334 0.00893497 0.00893378 0.00878906 0.00885391 0.00896621
|
|
0.00904179 0.00909328 0.00900531 0.00891733]
|
|
|
|
mean value: 0.008966898918151856
|
|
|
|
key: test_mcc
|
|
value: [1. 0.75 0.875 1. 0.73214286 0.87287156
|
|
1. 1. 0.73214286 0.875 ]
|
|
|
|
mean value: 0.8837157275229683
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.875 0.93333333 1. 0.86666667 0.93333333
|
|
1. 1. 0.86666667 0.93333333]
|
|
|
|
mean value: 0.9408333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 0.93333333 1. 0.85714286 0.92307692
|
|
1. 1. 0.875 0.93333333]
|
|
|
|
mean value: 0.9396886446886447
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.875 1. 0.85714286 1.
|
|
1. 1. 0.875 1. ]
|
|
|
|
mean value: 0.9482142857142857
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 0.85714286 0.85714286
|
|
1. 1. 0.875 0.875 ]
|
|
|
|
mean value: 0.9339285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.875 0.9375 1. 0.86607143 0.92857143
|
|
1. 1. 0.86607143 0.9375 ]
|
|
|
|
mean value: 0.9410714285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 0.875 1. 0.75 0.85714286
|
|
1. 1. 0.77777778 0.875 ]
|
|
|
|
mean value: 0.8912698412698412
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01719666 0.01709819 0.01698685 0.01688099 0.01700592 0.01693296
|
|
0.01785445 0.02687979 0.01735425 0.02863908]
|
|
|
|
mean value: 0.01928291320800781
|
|
|
|
key: score_time
|
|
value: [0.01173925 0.01162791 0.01165867 0.01255465 0.01246572 0.01261783
|
|
0.01174974 0.01269937 0.01248431 0.01166534]
|
|
|
|
mean value: 0.012126278877258301
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.62994079 0.87287156 0.87287156 0.73214286 0.87287156
|
|
0.41931393 0.49099025 0.75592895 0.56407607]
|
|
|
|
mean value: 0.6588972009888676
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.8125 0.93333333 0.93333333 0.86666667 0.93333333
|
|
0.66666667 0.73333333 0.86666667 0.73333333]
|
|
|
|
mean value: 0.8104166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.82352941 0.92307692 0.92307692 0.85714286 0.92307692
|
|
0.76190476 0.71428571 0.88888889 0.66666667]
|
|
|
|
mean value: 0.8208921797157092
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.77777778 1. 1. 0.85714286 1.
|
|
0.61538462 0.83333333 0.8 1. ]
|
|
|
|
mean value: 0.8455067155067155
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 0.85714286 0.85714286 0.85714286 0.85714286
|
|
1. 0.625 1. 0.5 ]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.8125 0.92857143 0.92857143 0.86607143 0.92857143
|
|
0.64285714 0.74107143 0.85714286 0.75 ]
|
|
|
|
mean value: 0.8080357142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.7 0.85714286 0.85714286 0.75 0.85714286
|
|
0.61538462 0.55555556 0.8 0.5 ]
|
|
|
|
mean value: 0.7063797313797313
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.99
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03938007 0.02838469 0.02835846 0.0434401 0.02969289 0.02813339
|
|
0.03353381 0.0308671 0.03408647 0.03346825]
|
|
|
|
mean value: 0.03293452262878418
|
|
|
|
key: score_time
|
|
value: [0.02064252 0.01147461 0.02169156 0.01526952 0.02015853 0.01412463
|
|
0.02033472 0.02013707 0.02364635 0.02059531]
|
|
|
|
mean value: 0.018807482719421387
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.75 0.60714286 1. 0.66143783 0.73214286
|
|
0.75592895 1. 0.75592895 0.73214286]
|
|
|
|
mean value: 0.7769320960473112
|
|
|
|
key: train_mcc
|
|
value: [0.95598573 0.97100831 0.97122151 0.95629932 0.97080136 0.95629932
|
|
0.97080136 0.95630861 0.98550418 0.95630861]
|
|
|
|
mean value: 0.9650538317814117
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.8 1. 0.8 0.86666667
|
|
0.86666667 1. 0.86666667 0.86666667]
|
|
|
|
mean value: 0.8816666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.97794118 0.98529412 0.98540146 0.97810219 0.98540146 0.97810219
|
|
0.98540146 0.97810219 0.99270073 0.97810219]
|
|
|
|
mean value: 0.9824549162730786
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.875 0.8 1. 0.82352941 0.85714286
|
|
0.88888889 1. 0.88888889 0.875 ]
|
|
|
|
mean value: 0.8865592903828198
|
|
|
|
key: train_fscore
|
|
value: [0.97810219 0.98550725 0.98529412 0.97841727 0.98550725 0.97841727
|
|
0.98529412 0.97810219 0.99259259 0.97810219]
|
|
|
|
mean value: 0.98253364223575
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.75 1. 0.7 0.85714286
|
|
0.8 1. 0.8 0.875 ]
|
|
|
|
mean value: 0.8657142857142857
|
|
|
|
key: train_precision
|
|
value: [0.97101449 0.97142857 1. 0.97142857 0.98550725 0.97142857
|
|
0.98529412 0.97101449 1. 0.97101449]
|
|
|
|
mean value: 0.9798130556570455
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.85714286 1. 1. 0.85714286
|
|
1. 1. 1. 0.875 ]
|
|
|
|
mean value: 0.9214285714285714
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:135: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:138: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
smnc_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98529412 1. 0.97101449 0.98550725 0.98550725 0.98550725
|
|
0.98529412 0.98529412 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9854006820119352
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.80357143 1. 0.8125 0.86607143
|
|
0.85714286 1. 0.85714286 0.86607143]
|
|
|
|
mean value: 0.88125
|
|
|
|
key: train_roc_auc
|
|
value: [0.97794118 0.98529412 0.98550725 0.97804774 0.98540068 0.97804774
|
|
0.98540068 0.97815431 0.99264706 0.97815431]
|
|
|
|
mean value: 0.982459505541347
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.77777778 0.66666667 1. 0.7 0.75
|
|
0.8 1. 0.8 0.77777778]
|
|
|
|
mean value: 0.8022222222222222
|
|
|
|
key: train_jcc
|
|
value: [0.95714286 0.97142857 0.97101449 0.95774648 0.97142857 0.95774648
|
|
0.97101449 0.95714286 0.98529412 0.95714286]
|
|
|
|
mean value: 0.9657101775186498
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.25549579 0.22080564 0.20524096 0.19396758 0.19368577 0.19323683
|
|
0.19286656 0.19027901 0.19815254 0.30556083]
|
|
|
|
mean value: 0.21492915153503417
|
|
|
|
key: score_time
|
|
value: [0.01167083 0.02065277 0.01978779 0.01162076 0.0197885 0.02050185
|
|
0.02145648 0.02330804 0.02225208 0.02265215]
|
|
|
|
mean value: 0.019369125366210938
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.62994079 0.60714286 1. 0.66143783 0.76376262
|
|
0.75592895 0.875 0.75592895 0.73214286]
|
|
|
|
mean value: 0.755588150750494
|
|
|
|
key: train_mcc
|
|
value: [0.95598573 1. 0.97122151 0.95629932 0.97080136 1.
|
|
1. 0.98550418 0.98550418 0.95630861]
|
|
|
|
mean value: 0.9781624891968844
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.8125 0.8 1. 0.8 0.86666667
|
|
0.86666667 0.93333333 0.86666667 0.86666667]
|
|
|
|
mean value: 0.86875
|
|
|
|
key: train_accuracy
|
|
value: [0.97794118 1. 0.98540146 0.97810219 0.98540146 1.
|
|
1. 0.99270073 0.99270073 0.97810219]
|
|
|
|
mean value: 0.9890349935594676
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.82352941 0.8 1. 0.82352941 0.875
|
|
0.88888889 0.93333333 0.88888889 0.875 ]
|
|
|
|
mean value: 0.8765312791783381
|
|
|
|
key: train_fscore
|
|
value: [0.97810219 1. 0.98529412 0.97841727 0.98550725 1.
|
|
1. 0.99259259 0.99259259 0.97810219]
|
|
|
|
mean value: 0.989060819495815
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.75 1. 0.7 0.77777778
|
|
0.8 1. 0.8 0.875 ]
|
|
|
|
mean value: 0.8480555555555556
|
|
|
|
key: train_precision
|
|
value: [0.97101449 1. 1. 0.97142857 0.98550725 1.
|
|
1. 1. 1. 0.97101449]
|
|
|
|
mean value: 0.989896480331263
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.85714286 1. 1. 1.
|
|
1. 0.875 1. 0.875 ]
|
|
|
|
mean value: 0.9232142857142858
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 0.97101449 0.98550725 0.98550725 1.
|
|
1. 0.98529412 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9883205456095482
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.8125 0.80357143 1. 0.8125 0.875
|
|
0.85714286 0.9375 0.85714286 0.86607143]
|
|
|
|
mean value: 0.8696428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [0.97794118 1. 0.98550725 0.97804774 0.98540068 1.
|
|
1. 0.99264706 0.99264706 0.97815431]
|
|
|
|
mean value: 0.98903452685422
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.7 0.66666667 1. 0.7 0.77777778
|
|
0.8 0.875 0.8 0.77777778]
|
|
|
|
mean value: 0.7847222222222222
|
|
|
|
key: train_jcc
|
|
value: [0.95714286 1. 0.97101449 0.95774648 0.97142857 1.
|
|
1. 0.98529412 0.98529412 0.95714286]
|
|
|
|
mean value: 0.9785063492635266
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.68
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02792597 0.02731872 0.0251379 0.0208137 0.02745724 0.02563
|
|
0.02640152 0.02301621 0.02505469 0.02661943]
|
|
|
|
mean value: 0.025537538528442382
|
|
|
|
key: score_time
|
|
value: [0.01137447 0.01140642 0.01132774 0.01132226 0.011338 0.01140189
|
|
0.01133633 0.01129055 0.01135898 0.0113225 ]
|
|
|
|
mean value: 0.01134791374206543
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.5 0.37796447 1. 0.76376262 0.73214286
|
|
0.46428571 0.73214286 0.49099025 0.47245559]
|
|
|
|
mean value: 0.630834103094063
|
|
|
|
key: train_mcc
|
|
value: [0.89715584 0.94117647 0.94323594 0.91281179 0.92791659 0.8978896
|
|
0.89863497 0.94199209 0.91277477 0.8978896 ]
|
|
|
|
mean value: 0.9171477642860466
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.75 0.66666667 1. 0.86666667 0.86666667
|
|
0.73333333 0.86666667 0.73333333 0.73333333]
|
|
|
|
mean value: 0.8091666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.94852941 0.97058824 0.97080292 0.95620438 0.96350365 0.94890511
|
|
0.94890511 0.97080292 0.95620438 0.94890511]
|
|
|
|
mean value: 0.958335122370116
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.75 0.70588235 1. 0.875 0.85714286
|
|
0.75 0.875 0.71428571 0.77777778]
|
|
|
|
mean value: 0.8162231559290383
|
|
|
|
key: train_fscore
|
|
value: [0.94814815 0.97058824 0.97014925 0.95588235 0.96296296 0.94964029
|
|
0.94736842 0.97014925 0.95522388 0.94814815]
|
|
|
|
mean value: 0.9578260944376671
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.6 1. 0.77777778 0.85714286
|
|
0.75 0.875 0.83333333 0.7 ]
|
|
|
|
mean value: 0.8143253968253968
|
|
|
|
key: train_precision
|
|
value: [0.95522388 0.97058824 1. 0.97014925 0.98484848 0.94285714
|
|
0.96923077 0.98484848 0.96969697 0.95522388]
|
|
|
|
mean value: 0.9702667101701342
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 1. 1. 0.85714286
|
|
0.75 0.875 0.625 0.875 ]
|
|
|
|
mean value: 0.8339285714285715
|
|
|
|
key: train_recall
|
|
value: [0.94117647 0.97058824 0.94202899 0.94202899 0.94202899 0.95652174
|
|
0.92647059 0.95588235 0.94117647 0.94117647]
|
|
|
|
mean value: 0.9459079283887468
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.75 0.67857143 1. 0.875 0.86607143
|
|
0.73214286 0.86607143 0.74107143 0.72321429]
|
|
|
|
mean value: 0.8107142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.94852941 0.97058824 0.97101449 0.95630861 0.96366155 0.9488491
|
|
0.94874254 0.9706948 0.95609548 0.9488491 ]
|
|
|
|
mean value: 0.9583333333333334
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.6 0.54545455 1. 0.77777778 0.75
|
|
0.6 0.77777778 0.55555556 0.63636364]
|
|
|
|
mean value: 0.6992929292929293
|
|
|
|
key: train_jcc
|
|
value: [0.90140845 0.94285714 0.94202899 0.91549296 0.92857143 0.90410959
|
|
0.9 0.94202899 0.91428571 0.90140845]
|
|
|
|
mean value: 0.9192191704924804
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.69153619 0.80936241 0.62225533 0.65131831 0.74939919 0.82583261
|
|
0.61750174 0.64893556 0.70422363 0.64521885]
|
|
|
|
mean value: 0.6965583801269531
|
|
|
|
key: score_time
|
|
value: [0.012501 0.01190329 0.01287556 0.01288652 0.01281619 0.01286817
|
|
0.01280427 0.01320028 0.01286268 0.01277542]
|
|
|
|
mean value: 0.012749338150024414
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.75 0.76376262 1. 0.66143783 0.73214286
|
|
0.64465837 0.87287156 0.46428571 0.87287156]
|
|
|
|
mean value: 0.7643947611817132
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.94160273]
|
|
|
|
mean value: 0.994160272804774
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 0.86666667 1. 0.8 0.86666667
|
|
0.8 0.93333333 0.73333333 0.93333333]
|
|
|
|
mean value: 0.8745833333333334
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.97080292]
|
|
|
|
mean value: 0.997080291970803
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.875 0.875 1. 0.82352941 0.85714286
|
|
0.84210526 0.94117647 0.75 0.94117647]
|
|
|
|
mean value: 0.8838463806575262
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.97058824]
|
|
|
|
mean value: 0.9970588235294118
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.77777778 1. 0.7 0.85714286
|
|
0.72727273 0.88888889 0.75 0.88888889]
|
|
|
|
mean value: 0.846497113997114
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.97058824]
|
|
|
|
mean value: 0.9970588235294118
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9357142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.97058824]
|
|
|
|
mean value: 0.9970588235294118
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.875 0.875 1. 0.8125 0.86607143
|
|
0.78571429 0.92857143 0.73214286 0.92857143]
|
|
|
|
mean value: 0.8741071428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.97080136]
|
|
|
|
mean value: 0.997080136402387
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.77777778 0.77777778 1. 0.7 0.75
|
|
0.72727273 0.88888889 0.6 0.88888889]
|
|
|
|
mean value: 0.7985606060606061
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.94285714]
|
|
|
|
mean value: 0.9942857142857143
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01223969 0.00993228 0.00880289 0.00858593 0.00832534 0.00831509
|
|
0.00848222 0.00867891 0.00828099 0.00835156]
|
|
|
|
mean value: 0.008999490737915039
|
|
|
|
key: score_time
|
|
value: [0.02127433 0.00887847 0.00874305 0.00918937 0.00837994 0.00838256
|
|
0.00842857 0.00844622 0.00842428 0.00837636]
|
|
|
|
mean value: 0.009852313995361328
|
|
|
|
key: test_mcc
|
|
value: [ 0.37796447 0.40451992 0.21821789 0.37796447 0.46770717 0.36689969
|
|
0.18898224 0.05455447 0.32732684 -0.25 ]
|
|
|
|
mean value: 0.25341371643494065
|
|
|
|
key: train_mcc
|
|
value: [0.413068 0.44611344 0.36921463 0.36921463 0.34968716 0.51090959
|
|
0.54031142 0.48879481 0.41938946 0.43843163]
|
|
|
|
mean value: 0.4345134780961774
|
|
|
|
key: test_accuracy
|
|
value: [0.625 0.6875 0.6 0.66666667 0.66666667 0.6
|
|
0.6 0.53333333 0.66666667 0.46666667]
|
|
|
|
mean value: 0.61125
|
|
|
|
key: train_accuracy
|
|
value: [0.68382353 0.69852941 0.66423358 0.66423358 0.65693431 0.75182482
|
|
0.75912409 0.73722628 0.68613139 0.70072993]
|
|
|
|
mean value: 0.7002790897380851
|
|
|
|
key: test_fscore
|
|
value: [0.72727273 0.73684211 0.625 0.70588235 0.73684211 0.7
|
|
0.66666667 0.58823529 0.70588235 0.63636364]
|
|
|
|
mean value: 0.6828987240829346
|
|
|
|
key: train_fscore
|
|
value: [0.74251497 0.75449102 0.72941176 0.72941176 0.72189349 0.77333333
|
|
0.78709677 0.76315789 0.74251497 0.74846626]
|
|
|
|
mean value: 0.7492292238552293
|
|
|
|
key: test_precision
|
|
value: [0.57142857 0.63636364 0.55555556 0.6 0.58333333 0.53846154
|
|
0.6 0.55555556 0.66666667 0.5 ]
|
|
|
|
mean value: 0.5807364857364857
|
|
|
|
key: train_precision
|
|
value: [0.62626263 0.63636364 0.61386139 0.61386139 0.61 0.71604938
|
|
0.70114943 0.69047619 0.62626263 0.64210526]
|
|
|
|
mean value: 0.6476391922803607
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 0.71428571 0.85714286 1. 1.
|
|
0.75 0.625 0.75 0.875 ]
|
|
|
|
mean value: 0.8446428571428571
|
|
|
|
key: train_recall
|
|
value: [0.91176471 0.92647059 0.89855072 0.89855072 0.88405797 0.84057971
|
|
0.89705882 0.85294118 0.91176471 0.89705882]
|
|
|
|
mean value: 0.8918797953964195
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.6875 0.60714286 0.67857143 0.6875 0.625
|
|
0.58928571 0.52678571 0.66071429 0.4375 ]
|
|
|
|
mean value: 0.6125
|
|
|
|
key: train_roc_auc
|
|
value: [0.68382353 0.69852941 0.66251066 0.66251066 0.65526428 0.75117221
|
|
0.76012361 0.73806479 0.68776641 0.7021526 ]
|
|
|
|
mean value: 0.7001918158567775
|
|
|
|
key: test_jcc
|
|
value: [0.57142857 0.58333333 0.45454545 0.54545455 0.58333333 0.53846154
|
|
0.5 0.41666667 0.54545455 0.46666667]
|
|
|
|
mean value: 0.5205344655344655
|
|
|
|
key: train_jcc
|
|
value: [0.59047619 0.60576923 0.57407407 0.57407407 0.56481481 0.63043478
|
|
0.64893617 0.61702128 0.59047619 0.59803922]
|
|
|
|
mean value: 0.5994116019788055
|
|
|
|
MCC on Blind test: -0.0
|
|
|
|
Accuracy on Blind test: 0.32
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0087018 0.00853872 0.00848269 0.00848675 0.00855303 0.00848556
|
|
0.0085392 0.00849295 0.00860119 0.00851774]
|
|
|
|
mean value: 0.008539962768554687
|
|
|
|
key: score_time
|
|
value: [0.00843382 0.00837398 0.00834632 0.00840616 0.00839901 0.00846195
|
|
0.00838351 0.00841093 0.0083952 0.00841308]
|
|
|
|
mean value: 0.008402395248413085
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.12598816 0.09449112 0.26189246 0.19642857 0.09449112
|
|
0.37796447 0.21821789 0.21821789 0.37796447]
|
|
|
|
mean value: 0.24820539348959375
|
|
|
|
key: train_mcc
|
|
value: [0.53033009 0.50349655 0.50959996 0.60584099 0.53517487 0.50469525
|
|
0.59240339 0.45151662 0.50525024 0.53294957]
|
|
|
|
mean value: 0.5271257516406771
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.5625 0.53333333 0.6 0.6 0.53333333
|
|
0.66666667 0.6 0.6 0.66666667]
|
|
|
|
mean value: 0.61125
|
|
|
|
key: train_accuracy
|
|
value: [0.76470588 0.75 0.75182482 0.80291971 0.76642336 0.75182482
|
|
0.79562044 0.72262774 0.75182482 0.76642336]
|
|
|
|
mean value: 0.7624194933447832
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.53333333 0.58823529 0.66666667 0.57142857 0.58823529
|
|
0.61538462 0.57142857 0.57142857 0.61538462]
|
|
|
|
mean value: 0.6099303311068016
|
|
|
|
key: train_fscore
|
|
value: [0.75757576 0.734375 0.734375 0.8057554 0.75757576 0.74626866
|
|
0.8 0.69354839 0.73846154 0.76119403]
|
|
|
|
mean value: 0.7529129522960445
|
|
|
|
key: test_precision
|
|
value: [0.7 0.57142857 0.5 0.54545455 0.57142857 0.5
|
|
0.8 0.66666667 0.66666667 0.8 ]
|
|
|
|
mean value: 0.6321645021645022
|
|
|
|
key: train_precision
|
|
value: [0.78125 0.78333333 0.79661017 0.8 0.79365079 0.76923077
|
|
0.77777778 0.76785714 0.77419355 0.77272727]
|
|
|
|
mean value: 0.7816630807455712
|
|
|
|
key: test_recall
|
|
value: [0.875 0.5 0.71428571 0.85714286 0.57142857 0.71428571
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.6232142857142857
|
|
|
|
key: train_recall
|
|
value: [0.73529412 0.69117647 0.68115942 0.8115942 0.72463768 0.72463768
|
|
0.82352941 0.63235294 0.70588235 0.75 ]
|
|
|
|
mean value: 0.7280264279624894
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.5625 0.54464286 0.61607143 0.59821429 0.54464286
|
|
0.67857143 0.60714286 0.60714286 0.67857143]
|
|
|
|
mean value: 0.61875
|
|
|
|
key: train_roc_auc
|
|
value: [0.76470588 0.75 0.75234442 0.80285592 0.76673061 0.75202472
|
|
0.79582268 0.72197357 0.7514919 0.76630435]
|
|
|
|
mean value: 0.7624254049445865
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.36363636 0.41666667 0.5 0.4 0.41666667
|
|
0.44444444 0.4 0.4 0.44444444]
|
|
|
|
mean value: 0.44222222222222224
|
|
|
|
key: train_jcc
|
|
value: [0.6097561 0.58024691 0.58024691 0.6746988 0.6097561 0.5952381
|
|
0.66666667 0.5308642 0.58536585 0.61445783]
|
|
|
|
mean value: 0.6047297461882631
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00832582 0.00817609 0.00818443 0.00810075 0.00809479 0.00813198
|
|
0.00812602 0.00824833 0.00817919 0.00815916]
|
|
|
|
mean value: 0.00817265510559082
|
|
|
|
key: score_time
|
|
value: [0.00930643 0.00926352 0.00937724 0.00929761 0.00933504 0.00922656
|
|
0.00925708 0.00928569 0.00933361 0.00929666]
|
|
|
|
mean value: 0.009297943115234375
|
|
|
|
key: test_mcc
|
|
value: [ 0.62994079 0.62994079 0.09449112 0.60714286 0.73214286 0.26189246
|
|
-0.07142857 0.53452248 0.46770717 0.05455447]
|
|
|
|
mean value: 0.3940906430845712
|
|
|
|
key: train_mcc
|
|
value: [0.54464795 0.57408838 0.62041773 0.57703846 0.49006025 0.5339313
|
|
0.62305217 0.59138421 0.62163943 0.6062745 ]
|
|
|
|
mean value: 0.5782534378327537
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.8125 0.53333333 0.8 0.86666667 0.6
|
|
0.46666667 0.73333333 0.66666667 0.53333333]
|
|
|
|
mean value: 0.6825
|
|
|
|
key: train_accuracy
|
|
value: [0.77205882 0.78676471 0.81021898 0.78832117 0.74452555 0.76642336
|
|
0.81021898 0.79562044 0.81021898 0.80291971]
|
|
|
|
mean value: 0.7887290682696436
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.8 0.58823529 0.8 0.85714286 0.66666667
|
|
0.5 0.8 0.54545455 0.58823529]
|
|
|
|
mean value: 0.6945734657499363
|
|
|
|
key: train_fscore
|
|
value: [0.77697842 0.79136691 0.8115942 0.79432624 0.75524476 0.76119403
|
|
0.81690141 0.79104478 0.81428571 0.79699248]
|
|
|
|
mean value: 0.790992893292864
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.85714286 0.5 0.75 0.85714286 0.54545455
|
|
0.5 0.66666667 1. 0.55555556]
|
|
|
|
mean value: 0.7089105339105339
|
|
|
|
key: train_precision
|
|
value: [0.76056338 0.77464789 0.8115942 0.77777778 0.72972973 0.78461538
|
|
0.78378378 0.8030303 0.79166667 0.81538462]
|
|
|
|
mean value: 0.7832793731492446
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.71428571 0.85714286 0.85714286 0.85714286
|
|
0.5 1. 0.375 0.625 ]
|
|
|
|
mean value: 0.7285714285714285
|
|
|
|
key: train_recall
|
|
value: [0.79411765 0.80882353 0.8115942 0.8115942 0.7826087 0.73913043
|
|
0.85294118 0.77941176 0.83823529 0.77941176]
|
|
|
|
mean value: 0.7997868712702473
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.8125 0.54464286 0.80357143 0.86607143 0.61607143
|
|
0.46428571 0.71428571 0.6875 0.52678571]
|
|
|
|
mean value: 0.6848214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [0.77205882 0.78676471 0.81020887 0.78815004 0.74424552 0.76662404
|
|
0.81052856 0.79550298 0.81042199 0.80274936]
|
|
|
|
mean value: 0.7887254901960784
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.66666667 0.41666667 0.66666667 0.75 0.5
|
|
0.33333333 0.66666667 0.375 0.41666667]
|
|
|
|
mean value: 0.5458333333333333
|
|
|
|
key: train_jcc
|
|
value: [0.63529412 0.6547619 0.68292683 0.65882353 0.60674157 0.61445783
|
|
0.69047619 0.65432099 0.68674699 0.6625 ]
|
|
|
|
mean value: 0.6547049951530348
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01064086 0.00967836 0.00960207 0.0098033 0.00957322 0.0095706
|
|
0.00952244 0.00956464 0.00957322 0.00955892]
|
|
|
|
mean value: 0.009708762168884277
|
|
|
|
key: score_time
|
|
value: [0.00922561 0.00877929 0.00873637 0.00890636 0.00864887 0.00866437
|
|
0.00868011 0.00870419 0.00868416 0.00871134]
|
|
|
|
mean value: 0.008774065971374511
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.37796447 0.49099025 0.73214286 0.73214286 0.46428571
|
|
0.60714286 0.49099025 0.37796447 0.32732684]
|
|
|
|
mean value: 0.5230891361497406
|
|
|
|
key: train_mcc
|
|
value: [0.84051051 0.85628096 0.85440207 0.83947987 0.79880676 0.85440207
|
|
0.8251228 0.79855228 0.79688349 0.81433714]
|
|
|
|
mean value: 0.8278777956305471
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.6875 0.73333333 0.86666667 0.86666667 0.73333333
|
|
0.8 0.73333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7566666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.91911765 0.92647059 0.9270073 0.91970803 0.89781022 0.9270073
|
|
0.91240876 0.89781022 0.89781022 0.90510949]
|
|
|
|
mean value: 0.9130259768140833
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.66666667 0.75 0.85714286 0.85714286 0.71428571
|
|
0.8 0.71428571 0.61538462 0.70588235]
|
|
|
|
mean value: 0.7480790777849601
|
|
|
|
key: train_fscore
|
|
value: [0.91603053 0.92307692 0.92647059 0.92086331 0.89393939 0.92647059
|
|
0.91044776 0.89230769 0.89393939 0.89922481]
|
|
|
|
mean value: 0.9102770990833234
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.71428571 0.66666667 0.85714286 0.85714286 0.71428571
|
|
0.85714286 0.83333333 0.8 0.66666667]
|
|
|
|
mean value: 0.7823809523809524
|
|
|
|
key: train_precision
|
|
value: [0.95238095 0.96774194 0.94029851 0.91428571 0.93650794 0.94029851
|
|
0.92424242 0.93548387 0.921875 0.95081967]
|
|
|
|
mean value: 0.9383934520925161
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.85714286 0.85714286 0.85714286 0.71428571
|
|
0.75 0.625 0.5 0.75 ]
|
|
|
|
mean value: 0.7285714285714285
|
|
|
|
key: train_recall
|
|
value: [0.88235294 0.88235294 0.91304348 0.92753623 0.85507246 0.91304348
|
|
0.89705882 0.85294118 0.86764706 0.85294118]
|
|
|
|
mean value: 0.8843989769820971
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.6875 0.74107143 0.86607143 0.86607143 0.73214286
|
|
0.80357143 0.74107143 0.67857143 0.66071429]
|
|
|
|
mean value: 0.7589285714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.91911765 0.92647059 0.92710997 0.91965047 0.89812447 0.92710997
|
|
0.91229753 0.89748508 0.89759165 0.90473146]
|
|
|
|
mean value: 0.9129688832054561
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.5 0.6 0.75 0.75 0.55555556
|
|
0.66666667 0.55555556 0.44444444 0.54545455]
|
|
|
|
mean value: 0.6034343434343434
|
|
|
|
key: train_jcc
|
|
value: [0.84507042 0.85714286 0.8630137 0.85333333 0.80821918 0.8630137
|
|
0.83561644 0.80555556 0.80821918 0.81690141]
|
|
|
|
mean value: 0.8356085768798484
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.51071286 0.62532473 0.49337792 0.53811383 0.55977559 0.71114159
|
|
0.52939415 0.50543237 0.55521274 0.65180755]
|
|
|
|
mean value: 0.5680293321609498
|
|
|
|
key: score_time
|
|
value: [0.01190805 0.01196837 0.01200247 0.01195788 0.01197672 0.01212215
|
|
0.01198769 0.0119679 0.01194954 0.0119822 ]
|
|
|
|
mean value: 0.011982297897338868
|
|
|
|
key: test_mcc
|
|
value: [0.75 0.75 0.66143783 1. 0.76376262 0.73214286
|
|
0.33928571 0.75592895 0.49099025 0.75592895]
|
|
|
|
mean value: 0.6999477160088584
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.8 1. 0.86666667 0.86666667
|
|
0.66666667 0.86666667 0.73333333 0.86666667]
|
|
|
|
mean value: 0.8416666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.875 0.82352941 1. 0.875 0.85714286
|
|
0.66666667 0.88888889 0.71428571 0.88888889]
|
|
|
|
mean value: 0.8464402427637722
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.875 0.875 0.7 1. 0.77777778 0.85714286
|
|
0.71428571 0.8 0.83333333 0.8 ]
|
|
|
|
mean value: 0.8232539682539682
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 0.85714286
|
|
0.625 1. 0.625 1. ]
|
|
|
|
mean value: 0.8857142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.8125 1. 0.875 0.86607143
|
|
0.66964286 0.85714286 0.74107143 0.85714286]
|
|
|
|
mean value: 0.8428571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.77777778 0.7 1. 0.77777778 0.75
|
|
0.5 0.8 0.55555556 0.8 ]
|
|
|
|
mean value: 0.7438888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01587987 0.01330566 0.01045179 0.01051211 0.01048994 0.01024008
|
|
0.01018476 0.00993347 0.01029968 0.01046205]
|
|
|
|
mean value: 0.011175942420959473
|
|
|
|
key: score_time
|
|
value: [0.01157689 0.00889277 0.00871801 0.0086174 0.0084703 0.0083189
|
|
0.00842404 0.00833297 0.00833917 0.00839949]
|
|
|
|
mean value: 0.00880899429321289
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.8819171 1. 0.875 0.875 0.87287156
|
|
0.87287156 0.87287156 0.60714286 0.87287156]
|
|
|
|
mean value: 0.8612463308295129
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.9375 1. 0.93333333 0.93333333 0.93333333
|
|
0.93333333 0.93333333 0.8 0.93333333]
|
|
|
|
mean value: 0.9275
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.94117647 1. 0.93333333 0.93333333 0.92307692
|
|
0.94117647 0.94117647 0.8 0.94117647]
|
|
|
|
mean value: 0.9295625942684766
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.88888889 1. 0.875 0.875 1.
|
|
0.88888889 0.88888889 0.85714286 0.88888889]
|
|
|
|
mean value: 0.9051587301587302
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9607142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.9375 1. 0.9375 0.9375 0.92857143
|
|
0.92857143 0.92857143 0.80357143 0.92857143]
|
|
|
|
mean value: 0.9267857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.88888889 1. 0.875 0.875 0.85714286
|
|
0.88888889 0.88888889 0.66666667 0.88888889]
|
|
|
|
mean value: 0.8718253968253968
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0862155 0.08660769 0.08695054 0.08672595 0.08652806 0.08790731
|
|
0.08697605 0.08698249 0.0864861 0.08691168]
|
|
|
|
mean value: 0.08682913780212402
|
|
|
|
key: score_time
|
|
value: [0.01669502 0.01704407 0.01698542 0.01700997 0.0178175 0.01703525
|
|
0.01697755 0.01743746 0.01710749 0.01699448]
|
|
|
|
mean value: 0.017110419273376466
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.62994079 0.66143783 0.87287156 0.73214286 0.60714286
|
|
0.66143783 1. 0.66143783 0.6000992 ]
|
|
|
|
mean value: 0.7308427848714014
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.8125 0.8 0.93333333 0.86666667 0.8
|
|
0.8 1. 0.8 0.8 ]
|
|
|
|
mean value: 0.855
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.82352941 0.82352941 0.92307692 0.85714286 0.8
|
|
0.76923077 1. 0.76923077 0.82352941]
|
|
|
|
mean value: 0.852260288730877
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.7 1. 0.85714286 0.75
|
|
1. 1. 1. 0.77777778]
|
|
|
|
mean value: 0.8862698412698413
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.85714286 0.85714286 0.85714286
|
|
0.625 1. 0.625 0.875 ]
|
|
|
|
mean value: 0.8446428571428571
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.8125 0.8125 0.92857143 0.86607143 0.80357143
|
|
0.8125 1. 0.8125 0.79464286]
|
|
|
|
mean value: 0.8580357142857142
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.7 0.7 0.85714286 0.75 0.66666667
|
|
0.625 1. 0.625 0.7 ]
|
|
|
|
mean value: 0.7498809523809523
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.04
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00886059 0.00908279 0.00891972 0.00858617 0.00867081 0.00878835
|
|
0.00867939 0.00867939 0.00872135 0.00870275]
|
|
|
|
mean value: 0.00876913070678711
|
|
|
|
key: score_time
|
|
value: [0.00852084 0.00866485 0.00882626 0.00853658 0.00854754 0.0085063
|
|
0.00849438 0.00842929 0.00840282 0.0084486 ]
|
|
|
|
mean value: 0.008537745475769043
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.8819171 0.76376262 0.73214286 0.66143783 0.60714286
|
|
0.73214286 0.87287156 0.33928571 0.64465837]
|
|
|
|
mean value: 0.67517595446532
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.9375 0.86666667 0.86666667 0.8 0.8
|
|
0.86666667 0.93333333 0.66666667 0.8 ]
|
|
|
|
mean value: 0.82875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.93333333 0.875 0.85714286 0.82352941 0.8
|
|
0.875 0.94117647 0.66666667 0.84210526]
|
|
|
|
mean value: 0.8391731780431471
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 1. 0.77777778 0.85714286 0.7 0.75
|
|
0.875 0.88888889 0.71428571 0.72727273]
|
|
|
|
mean value: 0.7990367965367965
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.85714286 1. 0.85714286
|
|
0.875 1. 0.625 1. ]
|
|
|
|
mean value: 0.8964285714285715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.9375 0.875 0.86607143 0.8125 0.80357143
|
|
0.86607143 0.92857143 0.66964286 0.78571429]
|
|
|
|
mean value: 0.8294642857142858
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.875 0.77777778 0.75 0.7 0.66666667
|
|
0.77777778 0.88888889 0.5 0.72727273]
|
|
|
|
mean value: 0.7299747474747474
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.13003492 1.12355256 1.12175441 1.11954212 1.13803053 1.15066171
|
|
1.15327144 1.13439465 1.11341023 1.12649703]
|
|
|
|
mean value: 1.1311149597167969
|
|
|
|
key: score_time
|
|
value: [0.08830166 0.08753419 0.08891082 0.09308314 0.09482765 0.09497786
|
|
0.09345198 0.08721018 0.08751631 0.09360695]
|
|
|
|
mean value: 0.09094207286834717
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.62994079 0.76376262 0.87287156 1. 0.73214286
|
|
0.60714286 1. 0.66143783 0.64465837]
|
|
|
|
mean value: 0.7793873982079018
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.8125 0.86666667 0.93333333 1. 0.86666667
|
|
0.8 1. 0.8 0.8 ]
|
|
|
|
mean value: 0.8816666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.82352941 0.875 0.92307692 1. 0.85714286
|
|
0.8 1. 0.76923077 0.84210526]
|
|
|
|
mean value: 0.8823418557706484
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.77777778 0.77777778 1. 1. 0.85714286
|
|
0.85714286 1. 1. 0.72727273]
|
|
|
|
mean value: 0.8997113997113997
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.85714286 1. 0.85714286
|
|
0.75 1. 0.625 1. ]
|
|
|
|
mean value: 0.8839285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.8125 0.875 0.92857143 1. 0.86607143
|
|
0.80357143 1. 0.8125 0.78571429]
|
|
|
|
mean value: 0.8821428571428571
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.7 0.77777778 0.85714286 1. 0.75
|
|
0.66666667 1. 0.625 0.72727273]
|
|
|
|
mean value: 0.7978860028860029
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.90586853 0.93603158 0.88809967 0.95108223 0.85255527 0.91164207
|
|
0.91695261 0.85206008 0.92855096 0.90283537]
|
|
|
|
mean value: 0.9045678377151489
|
|
|
|
key: score_time
|
|
value: [0.24762607 0.22040629 0.204952 0.16551399 0.1194768 0.21311998
|
|
0.22083116 0.18471122 0.22333884 0.13864398]
|
|
|
|
mean value: 0.19386203289031984
|
|
|
|
key: test_mcc
|
|
value: [0.75 0.75 0.76376262 0.87287156 0.875 0.6000992
|
|
0.73214286 0.75592895 0.66143783 0.6000992 ]
|
|
|
|
mean value: 0.736134220399536
|
|
|
|
key: train_mcc
|
|
value: [0.98540068 1. 0.98550725 0.98550725 0.98550725 0.97122151
|
|
0.97120941 0.98550418 0.98550418 0.97120941]
|
|
|
|
mean value: 0.9826571107637088
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.86666667 0.93333333 0.93333333 0.8
|
|
0.86666667 0.86666667 0.8 0.8 ]
|
|
|
|
mean value: 0.8616666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.99264706 1. 0.99270073 0.99270073 0.99270073 0.98540146
|
|
0.98540146 0.99270073 0.99270073 0.98540146]
|
|
|
|
mean value: 0.991235508802061
|
|
|
|
key: test_fscore
|
|
value: [0.875 0.875 0.875 0.92307692 0.93333333 0.76923077
|
|
0.875 0.88888889 0.76923077 0.82352941]
|
|
|
|
mean value: 0.860729009552539
|
|
|
|
key: train_fscore
|
|
value: [0.99259259 1. 0.99270073 0.99270073 0.99270073 0.98529412
|
|
0.98507463 0.99259259 0.99259259 0.98507463]
|
|
|
|
mean value: 0.9911323338937202
|
|
|
|
key: test_precision
|
|
value: [0.875 0.875 0.77777778 1. 0.875 0.83333333
|
|
0.875 0.8 1. 0.77777778]
|
|
|
|
mean value: 0.8688888888888889
|
|
|
|
key: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.85714286 1. 0.71428571
|
|
0.875 1. 0.625 0.875 ]
|
|
|
|
mean value: 0.8696428571428572
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 0.98550725 0.98550725 0.98550725 0.97101449
|
|
0.97058824 0.98529412 0.98529412 0.97058824]
|
|
|
|
mean value: 0.982459505541347
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.875 0.92857143 0.9375 0.79464286
|
|
0.86607143 0.85714286 0.8125 0.79464286]
|
|
|
|
mean value: 0.8616071428571429
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 1. 0.99275362 0.99275362 0.99275362 0.98550725
|
|
0.98529412 0.99264706 0.99264706 0.98529412]
|
|
|
|
mean value: 0.9912297527706735
|
|
|
|
key: test_jcc
|
|
value: [0.77777778 0.77777778 0.77777778 0.85714286 0.875 0.625
|
|
0.77777778 0.8 0.625 0.7 ]
|
|
|
|
mean value: 0.7593253968253968
|
|
|
|
key: train_jcc
|
|
value: [0.98529412 1. 0.98550725 0.98550725 0.98550725 0.97101449
|
|
0.97058824 0.98529412 0.98529412 0.97058824]
|
|
|
|
mean value: 0.982459505541347
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01912498 0.00874758 0.00960207 0.00982785 0.00947642 0.00888705
|
|
0.00887609 0.00895357 0.00890136 0.00877714]
|
|
|
|
mean value: 0.010117411613464355
|
|
|
|
key: score_time
|
|
value: [0.00888324 0.00855899 0.00940561 0.00900817 0.00922036 0.00869274
|
|
0.00869942 0.00855994 0.00850224 0.00864863]
|
|
|
|
mean value: 0.008817934989929199
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.12598816 0.09449112 0.26189246 0.19642857 0.09449112
|
|
0.37796447 0.21821789 0.21821789 0.37796447]
|
|
|
|
mean value: 0.24820539348959375
|
|
|
|
key: train_mcc
|
|
value: [0.53033009 0.50349655 0.50959996 0.60584099 0.53517487 0.50469525
|
|
0.59240339 0.45151662 0.50525024 0.53294957]
|
|
|
|
mean value: 0.5271257516406771
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.5625 0.53333333 0.6 0.6 0.53333333
|
|
0.66666667 0.6 0.6 0.66666667]
|
|
|
|
mean value: 0.61125
|
|
|
|
key: train_accuracy
|
|
value: [0.76470588 0.75 0.75182482 0.80291971 0.76642336 0.75182482
|
|
0.79562044 0.72262774 0.75182482 0.76642336]
|
|
|
|
mean value: 0.7624194933447832
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.53333333 0.58823529 0.66666667 0.57142857 0.58823529
|
|
0.61538462 0.57142857 0.57142857 0.61538462]
|
|
|
|
mean value: 0.6099303311068016
|
|
|
|
key: train_fscore
|
|
value: [0.75757576 0.734375 0.734375 0.8057554 0.75757576 0.74626866
|
|
0.8 0.69354839 0.73846154 0.76119403]
|
|
|
|
mean value: 0.7529129522960445
|
|
|
|
key: test_precision
|
|
value: [0.7 0.57142857 0.5 0.54545455 0.57142857 0.5
|
|
0.8 0.66666667 0.66666667 0.8 ]
|
|
|
|
mean value: 0.6321645021645022
|
|
|
|
key: train_precision
|
|
value: [0.78125 0.78333333 0.79661017 0.8 0.79365079 0.76923077
|
|
0.77777778 0.76785714 0.77419355 0.77272727]
|
|
|
|
mean value: 0.7816630807455712
|
|
|
|
key: test_recall
|
|
value: [0.875 0.5 0.71428571 0.85714286 0.57142857 0.71428571
|
|
0.5 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.6232142857142857
|
|
|
|
key: train_recall
|
|
value: [0.73529412 0.69117647 0.68115942 0.8115942 0.72463768 0.72463768
|
|
0.82352941 0.63235294 0.70588235 0.75 ]
|
|
|
|
mean value: 0.7280264279624894
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.5625 0.54464286 0.61607143 0.59821429 0.54464286
|
|
0.67857143 0.60714286 0.60714286 0.67857143]
|
|
|
|
mean value: 0.61875
|
|
|
|
key: train_roc_auc
|
|
value: [0.76470588 0.75 0.75234442 0.80285592 0.76673061 0.75202472
|
|
0.79582268 0.72197357 0.7514919 0.76630435]
|
|
|
|
mean value: 0.7624254049445865
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.36363636 0.41666667 0.5 0.4 0.41666667
|
|
0.44444444 0.4 0.4 0.44444444]
|
|
|
|
mean value: 0.44222222222222224
|
|
|
|
key: train_jcc
|
|
value: [0.6097561 0.58024691 0.58024691 0.6746988 0.6097561 0.5952381
|
|
0.66666667 0.5308642 0.58536585 0.61445783]
|
|
|
|
mean value: 0.6047297461882631
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.04883242 0.04633856 0.04238772 0.05236554 0.04883766 0.04279828
|
|
0.04210663 0.04309344 0.04330492 0.22795987]
|
|
|
|
mean value: 0.06380250453948974
|
|
|
|
key: score_time
|
|
value: [0.01022696 0.01112413 0.01113605 0.01113963 0.01104569 0.0110836
|
|
0.01104999 0.01103091 0.01105928 0.01130748]
|
|
|
|
mean value: 0.011020374298095704
|
|
|
|
key: test_mcc
|
|
value: [1. 0.75 1. 1. 0.875 1.
|
|
0.87287156 1. 0.87287156 1. ]
|
|
|
|
mean value: 0.9370743121887939
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.875 1. 1. 0.93333333 1.
|
|
0.93333333 1. 0.93333333 1. ]
|
|
|
|
mean value: 0.9675
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 1. 1. 0.93333333 1.
|
|
0.94117647 1. 0.94117647 1. ]
|
|
|
|
mean value: 0.9690686274509804
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 1. 1. 0.875 1.
|
|
0.88888889 1. 0.88888889 1. ]
|
|
|
|
mean value: 0.9527777777777777
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9875
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.875 1. 1. 0.9375 1.
|
|
0.92857143 1. 0.92857143 1. ]
|
|
|
|
mean value: 0.9669642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 1. 1. 0.875 1.
|
|
0.88888889 1. 0.88888889 1. ]
|
|
|
|
mean value: 0.9430555555555555
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.0287745 0.04505372 0.0480001 0.02139306 0.0232172 0.04378772
|
|
0.02212381 0.02828956 0.0516181 0.04729009]
|
|
|
|
mean value: 0.03595478534698486
|
|
|
|
key: score_time
|
|
value: [0.02199507 0.02018237 0.02165437 0.01165485 0.02183247 0.01162839
|
|
0.01166296 0.02029037 0.02277994 0.02115297]
|
|
|
|
mean value: 0.01848337650299072
|
|
|
|
key: test_mcc
|
|
value: [0.37796447 0.62994079 0.56407607 0.56407607 0.56407607 0.60714286
|
|
0.47245559 0.64465837 0.33928571 0.33928571]
|
|
|
|
mean value: 0.5102961734007362
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.8125 0.73333333 0.73333333 0.73333333 0.8
|
|
0.73333333 0.8 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7366666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.82352941 0.77777778 0.77777778 0.77777778 0.8
|
|
0.77777778 0.84210526 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7615961472308221
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.77777778 0.63636364 0.63636364 0.63636364 0.75
|
|
0.7 0.72727273 0.71428571 0.71428571]
|
|
|
|
mean value: 0.695937950937951
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 1. 1. 1. 0.85714286
|
|
0.875 1. 0.625 0.625 ]
|
|
|
|
mean value: 0.8607142857142858
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.8125 0.75 0.75 0.75 0.80357143
|
|
0.72321429 0.78571429 0.66964286 0.66964286]
|
|
|
|
mean value: 0.7401785714285715
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.7 0.63636364 0.63636364 0.63636364 0.66666667
|
|
0.63636364 0.72727273 0.5 0.5 ]
|
|
|
|
mean value: 0.6184848484848484
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02826953 0.00896072 0.00866175 0.00845218 0.00843525 0.00844526
|
|
0.00838947 0.00843453 0.00848961 0.00843048]
|
|
|
|
mean value: 0.010496878623962402
|
|
|
|
key: score_time
|
|
value: [0.00984025 0.00874448 0.00840664 0.00832248 0.00841665 0.00833011
|
|
0.00835133 0.00834513 0.00835109 0.00833511]
|
|
|
|
mean value: 0.008544325828552246
|
|
|
|
key: test_mcc
|
|
value: [ 0.51639778 0.51639778 0.37796447 0.20044593 0.32732684 -0.04029115
|
|
0.21821789 0.05455447 0.07142857 0.46428571]
|
|
|
|
mean value: 0.27067282990941854
|
|
|
|
key: train_mcc
|
|
value: [0.33856494 0.41194292 0.4690744 0.28501323 0.34357013 0.32871457
|
|
0.35764563 0.32871457 0.34299724 0.34359809]
|
|
|
|
mean value: 0.3549835701506521
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.66666667 0.6 0.66666667 0.46666667
|
|
0.6 0.53333333 0.53333333 0.73333333]
|
|
|
|
mean value: 0.63
|
|
|
|
key: train_accuracy
|
|
value: [0.66911765 0.70588235 0.72992701 0.64233577 0.67153285 0.66423358
|
|
0.67883212 0.66423358 0.67153285 0.67153285]
|
|
|
|
mean value: 0.6769160583941606
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.71428571 0.70588235 0.4 0.61538462 0.55555556
|
|
0.57142857 0.58823529 0.53333333 0.75 ]
|
|
|
|
mean value: 0.6211883214824391
|
|
|
|
key: train_fscore
|
|
value: [0.66165414 0.70149254 0.704 0.65734266 0.66666667 0.66176471
|
|
0.67164179 0.66666667 0.66666667 0.65648855]
|
|
|
|
mean value: 0.6714384376539886
|
|
|
|
key: test_precision
|
|
value: [0.7 0.83333333 0.6 0.66666667 0.66666667 0.45454545
|
|
0.66666667 0.55555556 0.57142857 0.75 ]
|
|
|
|
mean value: 0.6464862914862914
|
|
|
|
key: train_precision
|
|
value: [0.67692308 0.71212121 0.78571429 0.63513514 0.68181818 0.67164179
|
|
0.68181818 0.65714286 0.67164179 0.68253968]
|
|
|
|
mean value: 0.6856496195302165
|
|
|
|
key: test_recall
|
|
value: [0.875 0.625 0.85714286 0.28571429 0.57142857 0.71428571
|
|
0.5 0.625 0.5 0.75 ]
|
|
|
|
mean value: 0.6303571428571428
|
|
|
|
key: train_recall
|
|
value: [0.64705882 0.69117647 0.63768116 0.68115942 0.65217391 0.65217391
|
|
0.66176471 0.67647059 0.66176471 0.63235294]
|
|
|
|
mean value: 0.6593776641091219
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.67857143 0.58035714 0.66071429 0.48214286
|
|
0.60714286 0.52678571 0.53571429 0.73214286]
|
|
|
|
mean value: 0.6303571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.66911765 0.70588235 0.73060529 0.6420503 0.67167519 0.66432225
|
|
0.67870844 0.66432225 0.67146206 0.67124893]
|
|
|
|
mean value: 0.6769394714407502
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.55555556 0.54545455 0.25 0.44444444 0.38461538
|
|
0.4 0.41666667 0.36363636 0.6 ]
|
|
|
|
mean value: 0.4596736596736597
|
|
|
|
key: train_jcc
|
|
value: [0.49438202 0.54022989 0.54320988 0.48958333 0.5 0.49450549
|
|
0.50561798 0.5 0.5 0.48863636]
|
|
|
|
mean value: 0.5056164953075872
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01021981 0.01472306 0.01346636 0.01583171 0.01679564 0.01497579
|
|
0.01578879 0.01588607 0.01462793 0.01430941]
|
|
|
|
mean value: 0.014662456512451173
|
|
|
|
key: score_time
|
|
value: [0.00839567 0.01128626 0.01132059 0.01161289 0.01131344 0.01138806
|
|
0.01125145 0.01127243 0.01126599 0.01127338]
|
|
|
|
mean value: 0.01103801727294922
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.75 0.6000992 1. 0.76376262 0.73214286
|
|
0.33928571 0.87287156 0.66143783 0.73214286]
|
|
|
|
mean value: 0.7226339300497981
|
|
|
|
key: train_mcc
|
|
value: [0.94158382 0.98540068 0.91597649 0.95713391 0.97122151 0.95629932
|
|
0.94199209 0.97080136 0.95710706 0.92787101]
|
|
|
|
mean value: 0.9525387247176611
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.875 0.8 1. 0.86666667 0.86666667
|
|
0.66666667 0.93333333 0.8 0.86666667]
|
|
|
|
mean value: 0.855
|
|
|
|
key: train_accuracy
|
|
value: [0.97058824 0.99264706 0.95620438 0.97810219 0.98540146 0.97810219
|
|
0.97080292 0.98540146 0.97810219 0.96350365]
|
|
|
|
mean value: 0.9758855732073852
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.875 0.76923077 1. 0.875 0.85714286
|
|
0.66666667 0.94117647 0.76923077 0.875 ]
|
|
|
|
mean value: 0.8485590390002155
|
|
|
|
key: train_fscore
|
|
value: [0.97014925 0.99270073 0.95454545 0.97777778 0.98529412 0.97841727
|
|
0.97014925 0.98529412 0.97744361 0.96240602]
|
|
|
|
mean value: 0.9754177595254244
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.83333333 1. 0.77777778 0.85714286
|
|
0.71428571 0.88888889 1. 0.875 ]
|
|
|
|
mean value: 0.8821428571428571
|
|
|
|
key: train_precision
|
|
value: [0.98484848 0.98550725 1. 1. 1. 0.97142857
|
|
0.98484848 0.98529412 1. 0.98461538]
|
|
|
|
mean value: 0.9896542289764796
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.71428571 1. 1. 0.85714286
|
|
0.625 1. 0.625 0.875 ]
|
|
|
|
mean value: 0.8321428571428572
|
|
|
|
key: train_recall
|
|
value: [0.95588235 1. 0.91304348 0.95652174 0.97101449 0.98550725
|
|
0.95588235 0.98529412 0.95588235 0.94117647]
|
|
|
|
mean value: 0.9620204603580563
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.875 0.79464286 1. 0.875 0.86607143
|
|
0.66964286 0.92857143 0.8125 0.86607143]
|
|
|
|
mean value: 0.85625
|
|
|
|
key: train_roc_auc
|
|
value: [0.97058824 0.99264706 0.95652174 0.97826087 0.98550725 0.97804774
|
|
0.9706948 0.98540068 0.97794118 0.96334186]
|
|
|
|
mean value: 0.9758951406649616
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.77777778 0.625 1. 0.77777778 0.75
|
|
0.5 0.88888889 0.625 0.77777778]
|
|
|
|
mean value: 0.7472222222222222
|
|
|
|
key: train_jcc
|
|
value: [0.94202899 0.98550725 0.91304348 0.95652174 0.97101449 0.95774648
|
|
0.94202899 0.97101449 0.95588235 0.92753623]
|
|
|
|
mean value: 0.9522324483988329
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.46
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01297069 0.01287246 0.0131824 0.01306939 0.01283884 0.0133245
|
|
0.01278806 0.01465464 0.01478004 0.035779 ]
|
|
|
|
mean value: 0.015626001358032226
|
|
|
|
key: score_time
|
|
value: [0.01125312 0.01122284 0.01120329 0.01118088 0.01124144 0.01121807
|
|
0.01123738 0.01154613 0.01181602 0.0119319 ]
|
|
|
|
mean value: 0.011385107040405273
|
|
|
|
key: test_mcc
|
|
value: [0.57735027 0.62994079 0.875 0.875 0.66143783 0.6000992
|
|
0.49099025 0.28571429 0.46428571 0.73214286]
|
|
|
|
mean value: 0.6191961193627304
|
|
|
|
key: train_mcc
|
|
value: [0.61134064 0.91533482 1. 0.87086187 0.94199209 0.62625207
|
|
0.84660737 0.4690195 0.73427935 0.92709446]
|
|
|
|
mean value: 0.7942782177369494
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.8125 0.93333333 0.93333333 0.8 0.8
|
|
0.73333333 0.6 0.73333333 0.86666667]
|
|
|
|
mean value: 0.79625
|
|
|
|
key: train_accuracy
|
|
value: [0.77205882 0.95588235 1. 0.93430657 0.97080292 0.7810219
|
|
0.91970803 0.67883212 0.8540146 0.96350365]
|
|
|
|
mean value: 0.8830130957492486
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.82352941 0.93333333 0.93333333 0.82352941 0.76923077
|
|
0.71428571 0.72727273 0.75 0.875 ]
|
|
|
|
mean value: 0.814951470098529
|
|
|
|
key: train_fscore
|
|
value: [0.81437126 0.95774648 1. 0.93706294 0.97142857 0.72222222
|
|
0.91338583 0.75555556 0.87012987 0.96296296]
|
|
|
|
mean value: 0.8904865682492042
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.77777778 0.875 0.875 0.7 0.83333333
|
|
0.83333333 0.57142857 0.75 0.875 ]
|
|
|
|
mean value: 0.7757539682539683
|
|
|
|
key: train_precision
|
|
value: [0.68686869 0.91891892 1. 0.90540541 0.95774648 1.
|
|
0.98305085 0.60714286 0.77906977 0.97014925]
|
|
|
|
mean value: 0.8808352215839939
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 1. 0.71428571
|
|
0.625 1. 0.75 0.875 ]
|
|
|
|
mean value: 0.8839285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.97101449 0.98550725 0.56521739
|
|
0.85294118 1. 0.98529412 0.95588235]
|
|
|
|
mean value: 0.9315856777493606
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.8125 0.9375 0.9375 0.8125 0.79464286
|
|
0.74107143 0.57142857 0.73214286 0.86607143]
|
|
|
|
mean value: 0.7955357142857142
|
|
|
|
key: train_roc_auc
|
|
value: [0.77205882 0.95588235 1. 0.93403666 0.9706948 0.7826087
|
|
0.91922421 0.68115942 0.8549659 0.96344842]
|
|
|
|
mean value: 0.8834079283887468
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.7 0.875 0.875 0.7 0.625
|
|
0.55555556 0.57142857 0.6 0.77777778]
|
|
|
|
mean value: 0.6946428571428571
|
|
|
|
key: train_jcc
|
|
value: [0.68686869 0.91891892 1. 0.88157895 0.94444444 0.56521739
|
|
0.84057971 0.60714286 0.77011494 0.92857143]
|
|
|
|
mean value: 0.8143437327292767
|
|
|
|
MCC on Blind test: 0.08
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.10859799 0.09386134 0.09250259 0.09275675 0.09327698 0.09390163
|
|
0.0930047 0.09283376 0.09291363 0.09362578]
|
|
|
|
mean value: 0.0947275161743164
|
|
|
|
key: score_time
|
|
value: [0.01455379 0.01461649 0.01445794 0.01458097 0.01457262 0.01485562
|
|
0.01465678 0.01462412 0.01461625 0.01483679]
|
|
|
|
mean value: 0.014637136459350586
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.75 0.875 1. 0.875 1.
|
|
0.75592895 1. 0.60714286 0.87287156]
|
|
|
|
mean value: 0.8617860467793478
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 0.93333333 1. 0.93333333 1.
|
|
0.86666667 1. 0.8 0.93333333]
|
|
|
|
mean value: 0.9279166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.875 0.93333333 1. 0.93333333 1.
|
|
0.88888889 1. 0.8 0.94117647]
|
|
|
|
mean value: 0.9305065359477124
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.875 1. 0.875 1.
|
|
0.8 1. 0.85714286 0.88888889]
|
|
|
|
mean value: 0.9171031746031746
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 1. 1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.95
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.875 0.9375 1. 0.9375 1.
|
|
0.85714286 1. 0.80357143 0.92857143]
|
|
|
|
mean value: 0.9276785714285715
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.77777778 0.875 1. 0.875 1.
|
|
0.8 1. 0.66666667 0.88888889]
|
|
|
|
mean value: 0.8758333333333334
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04233217 0.0453558 0.03615093 0.0350399 0.05168247 0.04953241
|
|
0.03539181 0.02963448 0.02912092 0.03293037]
|
|
|
|
mean value: 0.038717126846313475
|
|
|
|
key: score_time
|
|
value: [0.02476406 0.01846552 0.02050114 0.03115773 0.03751206 0.03031969
|
|
0.01657867 0.01927161 0.01637602 0.02038503]
|
|
|
|
mean value: 0.023533153533935546
|
|
|
|
key: test_mcc
|
|
value: [1. 0.75 0.875 1. 0.73214286 0.73214286
|
|
0.87287156 1. 0.60714286 1. ]
|
|
|
|
mean value: 0.856930013237254
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.98550725 1. 0.98550725 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971014492753624
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.875 0.93333333 1. 0.86666667 0.86666667
|
|
0.93333333 1. 0.8 1. ]
|
|
|
|
mean value: 0.9275
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.99270073 1. 0.99270073 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985401459854014
|
|
|
|
key: test_fscore
|
|
value: [1. 0.875 0.93333333 1. 0.85714286 0.85714286
|
|
0.94117647 1. 0.8 1. ]
|
|
|
|
mean value: 0.9263795518207283
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.99270073 1. 0.99270073 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985401459854015
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.875 1. 0.85714286 0.85714286
|
|
0.88888889 1. 0.85714286 1. ]
|
|
|
|
mean value: 0.921031746031746
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 0.85714286 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9339285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 0.98550725 1. 0.98550725 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971014492753624
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.875 0.9375 1. 0.86607143 0.86607143
|
|
0.92857143 1. 0.80357143 1. ]
|
|
|
|
mean value: 0.9276785714285715
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.99275362 1. 0.99275362 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9985507246376811
|
|
|
|
key: test_jcc
|
|
value: [1. 0.77777778 0.875 1. 0.75 0.75
|
|
0.88888889 1. 0.66666667 1. ]
|
|
|
|
mean value: 0.8708333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.98550725 1. 0.98550725 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9971014492753624
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.04632425 0.04589558 0.05224323 0.04858661 0.0493474 0.04912758
|
|
0.04896307 0.06054115 0.0467627 0.04628658]
|
|
|
|
mean value: 0.04940781593322754
|
|
|
|
key: score_time
|
|
value: [0.0182476 0.022089 0.02181435 0.02127481 0.02358222 0.02248502
|
|
0.02054977 0.02320457 0.01664424 0.02343416]
|
|
|
|
mean value: 0.021332573890686036
|
|
|
|
key: test_mcc
|
|
value: [0.40451992 0.62994079 0.56407607 0.875 0.76376262 0.49099025
|
|
0.33928571 0.87287156 0.66143783 0.6000992 ]
|
|
|
|
mean value: 0.620198395064619
|
|
|
|
key: train_mcc
|
|
value: [0.98540068 0.98540068 0.98550418 0.98550418 0.98550418 0.98550418
|
|
0.98550725 0.98550725 0.98550725 1. ]
|
|
|
|
mean value: 0.9869339808783808
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.8125 0.73333333 0.93333333 0.86666667 0.73333333
|
|
0.66666667 0.93333333 0.8 0.8 ]
|
|
|
|
mean value: 0.7966666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.99264706 0.99264706 0.99270073 0.99270073 0.99270073 0.99270073
|
|
0.99270073 0.99270073 0.99270073 1. ]
|
|
|
|
mean value: 0.993419922713611
|
|
|
|
key: test_fscore
|
|
value: [0.73684211 0.82352941 0.77777778 0.93333333 0.875 0.75
|
|
0.66666667 0.94117647 0.76923077 0.82352941]
|
|
|
|
mean value: 0.8097085946389352
|
|
|
|
key: train_fscore
|
|
value: [0.99270073 0.99270073 0.99280576 0.99280576 0.99280576 0.99280576
|
|
0.99270073 0.99270073 0.99270073 1. ]
|
|
|
|
mean value: 0.9934726671217771
|
|
|
|
key: test_precision
|
|
value: [0.63636364 0.77777778 0.63636364 0.875 0.77777778 0.66666667
|
|
0.71428571 0.88888889 1. 0.77777778]
|
|
|
|
mean value: 0.7750901875901876
|
|
|
|
key: train_precision
|
|
value: [0.98550725 0.98550725 0.98571429 0.98571429 0.98571429 0.98571429
|
|
0.98550725 0.98550725 0.98550725 1. ]
|
|
|
|
mean value: 0.9870393374741201
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 0.85714286
|
|
0.625 1. 0.625 0.875 ]
|
|
|
|
mean value: 0.8732142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.8125 0.75 0.9375 0.875 0.74107143
|
|
0.66964286 0.92857143 0.8125 0.79464286]
|
|
|
|
mean value: 0.8008928571428572
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 0.99264706 0.99264706 0.99264706 0.99264706 0.99264706
|
|
0.99275362 0.99275362 0.99275362 1. ]
|
|
|
|
mean value: 0.9934143222506394
|
|
|
|
key: test_jcc
|
|
value: [0.58333333 0.7 0.63636364 0.875 0.77777778 0.6
|
|
0.5 0.88888889 0.625 0.7 ]
|
|
|
|
mean value: 0.6886363636363636
|
|
|
|
key: train_jcc
|
|
value: [0.98550725 0.98550725 0.98571429 0.98571429 0.98571429 0.98571429
|
|
0.98550725 0.98550725 0.98550725 1. ]
|
|
|
|
mean value: 0.9870393374741201
|
|
|
|
MCC on Blind test: -0.06
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.24086428 0.1915431 0.19633222 0.23168492 0.23006678 0.22822237
|
|
0.22251344 0.19692373 0.23212194 0.23451424]
|
|
|
|
mean value: 0.2204787015914917
|
|
|
|
key: score_time
|
|
value: [0.00912738 0.00884342 0.00901175 0.00892282 0.00904131 0.00898123
|
|
0.00881338 0.00891829 0.00899601 0.00898814]
|
|
|
|
mean value: 0.00896437168121338
|
|
|
|
key: test_mcc
|
|
value: [1. 0.8819171 1. 1. 0.875 0.87287156
|
|
0.87287156 0.87287156 0.60714286 0.87287156]
|
|
|
|
mean value: 0.8855546204606932
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [1. 0.9375 1. 1. 0.93333333 0.93333333
|
|
0.93333333 0.93333333 0.8 0.93333333]
|
|
|
|
mean value: 0.9404166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [1. 0.94117647 1. 1. 0.93333333 0.92307692
|
|
0.94117647 0.94117647 0.8 0.94117647]
|
|
|
|
mean value: 0.9421116138763197
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 0.88888889 1. 1. 0.875 1.
|
|
0.88888889 0.88888889 0.85714286 0.88888889]
|
|
|
|
mean value: 0.9287698412698413
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9607142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [1. 0.9375 1. 1. 0.9375 0.92857143
|
|
0.92857143 0.92857143 0.80357143 0.92857143]
|
|
|
|
mean value: 0.9392857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [1. 0.88888889 1. 1. 0.875 0.85714286
|
|
0.88888889 0.88888889 0.66666667 0.88888889]
|
|
|
|
mean value: 0.8954365079365079
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01531243 0.01658034 0.01707101 0.01658964 0.01652956 0.01666713
|
|
0.0167017 0.0350194 0.01742578 0.03410149]
|
|
|
|
mean value: 0.020199847221374512
|
|
|
|
key: score_time
|
|
value: [0.01163173 0.01171184 0.0116539 0.01162744 0.01165056 0.01259828
|
|
0.01246953 0.01177812 0.01256156 0.01198077]
|
|
|
|
mean value: 0.011966371536254882
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.8819171 0.75592895 0.75592895 0.75592895 0.75592895
|
|
0.46770717 1. 0.56407607 0.66143783]
|
|
|
|
mean value: 0.7373450632934155
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.9375 0.86666667 0.86666667 0.86666667 0.86666667
|
|
0.66666667 1. 0.73333333 0.8 ]
|
|
|
|
mean value: 0.8479166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.93333333 0.83333333 0.83333333 0.83333333 0.83333333
|
|
0.54545455 1. 0.66666667 0.76923077]
|
|
|
|
mean value: 0.8105161505161504
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.71428571 0.71428571 0.71428571 0.71428571
|
|
0.375 1. 0.5 0.625 ]
|
|
|
|
mean value: 0.6982142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.9375 0.85714286 0.85714286 0.85714286 0.85714286
|
|
0.6875 1. 0.75 0.8125 ]
|
|
|
|
mean value: 0.8491071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.875 0.71428571 0.71428571 0.71428571 0.71428571
|
|
0.375 1. 0.5 0.625 ]
|
|
|
|
mean value: 0.6982142857142857
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.99
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03337741 0.03868771 0.03221297 0.03223085 0.03231907 0.03239298
|
|
0.032233 0.03233957 0.03223705 0.03226876]
|
|
|
|
mean value: 0.033029937744140626
|
|
|
|
key: score_time
|
|
value: [0.02033663 0.0221262 0.01984525 0.02155614 0.01149225 0.02050543
|
|
0.02263975 0.01156116 0.02123237 0.02290106]
|
|
|
|
mean value: 0.01941962242126465
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.75 0.66143783 1. 0.66143783 0.73214286
|
|
0.87287156 0.87287156 0.60714286 0.87287156]
|
|
|
|
mean value: 0.7912693156338115
|
|
|
|
key: train_mcc
|
|
value: [0.97058824 0.98540068 0.98550725 0.97080136 0.97080136 0.97080136
|
|
0.97080136 0.97080136 0.98550418 0.97080136]
|
|
|
|
mean value: 0.975180852135241
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 0.8 1. 0.8 0.86666667
|
|
0.93333333 0.93333333 0.8 0.93333333]
|
|
|
|
mean value: 0.8879166666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.98529412 0.99264706 0.99270073 0.98540146 0.98540146 0.98540146
|
|
0.98540146 0.98540146 0.99270073 0.98540146]
|
|
|
|
mean value: 0.9875751395448691
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.875 0.82352941 1. 0.82352941 0.85714286
|
|
0.94117647 0.94117647 0.8 0.94117647]
|
|
|
|
mean value: 0.8936064425770308
|
|
|
|
key: train_fscore
|
|
value: [0.98529412 0.99270073 0.99270073 0.98550725 0.98550725 0.98550725
|
|
0.98529412 0.98529412 0.99259259 0.98529412]
|
|
|
|
mean value: 0.9875692262165278
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.7 1. 0.7 0.85714286
|
|
0.88888889 0.88888889 0.85714286 0.88888889]
|
|
|
|
mean value: 0.8655952380952381
|
|
|
|
key: train_precision
|
|
value: [0.98529412 0.98550725 1. 0.98550725 0.98550725 0.98550725
|
|
0.98529412 0.98529412 1. 0.98529412]
|
|
|
|
mean value: 0.9883205456095482
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9357142857142857
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:155: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:158: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
ros_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98529412 1. 0.98550725 0.98550725 0.98550725 0.98550725
|
|
0.98529412 0.98529412 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9868499573742541
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.875 0.8125 1. 0.8125 0.86607143
|
|
0.92857143 0.92857143 0.80357143 0.92857143]
|
|
|
|
mean value: 0.8892857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [0.98529412 0.99264706 0.99275362 0.98540068 0.98540068 0.98540068
|
|
0.98540068 0.98540068 0.99264706 0.98540068]
|
|
|
|
mean value: 0.9875745950554136
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.77777778 0.7 1. 0.7 0.75
|
|
0.88888889 0.88888889 0.66666667 0.88888889]
|
|
|
|
mean value: 0.8136111111111111
|
|
|
|
key: train_jcc
|
|
value: [0.97101449 0.98550725 0.98550725 0.97142857 0.97142857 0.97142857
|
|
0.97101449 0.97101449 0.98529412 0.97101449]
|
|
|
|
mean value: 0.975465229570089
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.21865153 0.19576311 0.21859837 0.21810484 0.19659543 0.20908713
|
|
0.1991601 0.19221115 0.19844651 0.20431876]
|
|
|
|
mean value: 0.20509369373321534
|
|
|
|
key: score_time
|
|
value: [0.02091098 0.02276993 0.0219698 0.0205822 0.02166986 0.01975226
|
|
0.02068043 0.02233529 0.02191591 0.02306247]
|
|
|
|
mean value: 0.02156491279602051
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.75 0.66143783 1. 0.66143783 0.49099025
|
|
0.6000992 0.87287156 0.60714286 0.87287156]
|
|
|
|
mean value: 0.7398768189431251
|
|
|
|
key: train_mcc
|
|
value: [0.97058824 0.98540068 0.98550725 0.98550418 0.97080136 1.
|
|
0.98550725 0.97080136 0.98550418 0.97080136]
|
|
|
|
mean value: 0.9810415854946039
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 0.8 1. 0.8 0.73333333
|
|
0.8 0.93333333 0.8 0.93333333]
|
|
|
|
mean value: 0.8612500000000001
|
|
|
|
key: train_accuracy
|
|
value: [0.98529412 0.99264706 0.99270073 0.99270073 0.98540146 1.
|
|
0.99270073 0.98540146 0.99270073 0.98540146]
|
|
|
|
mean value: 0.9904948475740661
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.875 0.82352941 1. 0.82352941 0.75
|
|
0.82352941 0.94117647 0.8 0.94117647]
|
|
|
|
mean value: 0.8711274509803921
|
|
|
|
key: train_fscore
|
|
value: [0.98529412 0.99270073 0.99270073 0.99280576 0.98550725 1.
|
|
0.99270073 0.98529412 0.99259259 0.98529412]
|
|
|
|
mean value: 0.9904890137087287
|
|
|
|
key: test_precision
|
|
value: [1. 0.875 0.7 1. 0.7 0.66666667
|
|
0.77777778 0.88888889 0.85714286 0.88888889]
|
|
|
|
mean value: 0.8354365079365079
|
|
|
|
key: train_precision
|
|
value: [0.98529412 0.98550725 1. 0.98571429 0.98550725 1.
|
|
0.98550725 0.98529412 1. 0.98529412]
|
|
|
|
mean value: 0.9898118377785897
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 0.85714286
|
|
0.875 1. 0.75 1. ]
|
|
|
|
mean value: 0.9232142857142858
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 0.98550725 1. 0.98550725 1.
|
|
1. 0.98529412 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9912190963341859
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.875 0.8125 1. 0.8125 0.74107143
|
|
0.79464286 0.92857143 0.80357143 0.92857143]
|
|
|
|
mean value: 0.8633928571428572
|
|
|
|
key: train_roc_auc
|
|
value: [0.98529412 0.99264706 0.99275362 0.99264706 0.98540068 1.
|
|
0.99275362 0.98540068 0.99264706 0.98540068]
|
|
|
|
mean value: 0.9904944586530264
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.77777778 0.7 1. 0.7 0.6
|
|
0.7 0.88888889 0.66666667 0.88888889]
|
|
|
|
mean value: 0.7797222222222222
|
|
|
|
key: train_jcc
|
|
value: [0.97101449 0.98550725 0.98550725 0.98571429 0.97142857 1.
|
|
0.98550725 0.97101449 0.98529412 0.97101449]
|
|
|
|
mean value: 0.9812002192181221
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02503753 0.02437925 0.0221231 0.02506065 0.02024388 0.02260089
|
|
0.02252674 0.0209341 0.02013969 0.02230787]
|
|
|
|
mean value: 0.022535371780395507
|
|
|
|
key: score_time
|
|
value: [0.01144624 0.01141882 0.01129723 0.0114677 0.01127219 0.01128459
|
|
0.01127362 0.01134109 0.01124883 0.01125479]
|
|
|
|
mean value: 0.011330509185791015
|
|
|
|
key: test_mcc
|
|
value: [0.31622777 0.15811388 0.8 0.5976143 0.63245553 0.8
|
|
0.25819889 0.5 0. 0.77459667]
|
|
|
|
mean value: 0.4837207044714774
|
|
|
|
key: train_mcc
|
|
value: [1. 0.92234997 0.92240216 0.94804318 0.8972297 0.87263594
|
|
0.94871795 0.97467943 0.92338052 0.89861829]
|
|
|
|
mean value: 0.930805714729196
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.55555556 0.88888889 0.77777778 0.77777778 0.88888889
|
|
0.625 0.75 0.5 0.875 ]
|
|
|
|
mean value: 0.7305555555555555
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.96103896 0.96103896 0.97402597 0.94805195 0.93506494
|
|
0.97435897 0.98717949 0.96153846 0.94871795]
|
|
|
|
mean value: 0.9651015651015651
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.6 0.88888889 0.83333333 0.75 0.88888889
|
|
0.66666667 0.75 0.6 0.85714286]
|
|
|
|
mean value: 0.7406349206349206
|
|
|
|
key: train_fscore
|
|
value: [1. 0.96202532 0.96103896 0.97368421 0.94594595 0.93150685
|
|
0.97435897 0.98701299 0.96103896 0.94736842]
|
|
|
|
mean value: 0.9643980626745541
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.5 0.8 0.71428571 1. 1.
|
|
0.6 0.75 0.5 1. ]
|
|
|
|
mean value: 0.753095238095238
|
|
|
|
key: train_precision
|
|
value: [1. 0.95 0.97368421 0.97368421 0.97222222 0.97142857
|
|
0.97435897 1. 0.97368421 0.97297297]
|
|
|
|
mean value: 0.9762035372561688
|
|
|
|
key: test_recall
|
|
value: [0.5 0.75 1. 1. 0.6 0.8 0.75 0.75 0.75 0.75]
|
|
|
|
mean value: 0.765
|
|
|
|
key: train_recall
|
|
value: [1. 0.97435897 0.94871795 0.97368421 0.92105263 0.89473684
|
|
0.97435897 0.97435897 0.94871795 0.92307692]
|
|
|
|
mean value: 0.953306342780027
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.575 0.9 0.75 0.8 0.9 0.625 0.75 0.5 0.875]
|
|
|
|
mean value: 0.7325
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.9608637 0.96120108 0.97402159 0.9477058 0.93454791
|
|
0.97435897 0.98717949 0.96153846 0.94871795]
|
|
|
|
mean value: 0.9650134952766531
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.42857143 0.8 0.71428571 0.6 0.8
|
|
0.5 0.6 0.42857143 0.75 ]
|
|
|
|
mean value: 0.6021428571428571
|
|
|
|
key: train_jcc
|
|
value: [1. 0.92682927 0.925 0.94871795 0.8974359 0.87179487
|
|
0.95 0.97435897 0.925 0.9 ]
|
|
|
|
mean value: 0.9319136960600375
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.67788839 0.4790628 0.51931477 0.50791335 0.656353 0.51453042
|
|
0.48985314 0.49819374 0.5512023 0.60133958]
|
|
|
|
mean value: 0.5495651483535766
|
|
|
|
key: score_time
|
|
value: [0.01164913 0.01161647 0.01164889 0.01170087 0.01168561 0.02243257
|
|
0.01179647 0.01173663 0.02173638 0.0123179 ]
|
|
|
|
mean value: 0.01383209228515625
|
|
|
|
key: test_mcc
|
|
value: [0.35 0.47809144 0.8 0.5976143 0.63245553 0.1
|
|
0.57735027 0.5 0.5 0.77459667]
|
|
|
|
mean value: 0.5310108218865739
|
|
|
|
key: train_mcc
|
|
value: [1. 0.61039852 1. 1. 0.97434188 0.48234809
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9067088488928768
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.88888889 0.77777778 0.77777778 0.55555556
|
|
0.75 0.75 0.75 0.875 ]
|
|
|
|
mean value: 0.7458333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.80519481 1. 1. 0.98701299 0.74025974
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9532467532467532
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72727273 0.88888889 0.83333333 0.75 0.6
|
|
0.8 0.75 0.75 0.85714286]
|
|
|
|
mean value: 0.7623304473304473
|
|
|
|
key: train_fscore
|
|
value: [1. 0.81012658 1. 1. 0.98666667 0.72222222
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.951901547116737
|
|
|
|
key: test_precision
|
|
value: [0.6 0.57142857 0.8 0.71428571 1. 0.6
|
|
0.66666667 0.75 0.75 1. ]
|
|
|
|
mean value: 0.7452380952380953
|
|
|
|
key: train_precision
|
|
value: [1. 0.8 1. 1. 1. 0.76470588
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9564705882352941
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 1. 0.6 0.6 1. 0.75 0.75 0.75]
|
|
|
|
mean value: 0.82
|
|
|
|
key: train_recall
|
|
value: [1. 0.82051282 1. 1. 0.97368421 0.68421053
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9478407557354925
|
|
|
|
key: test_roc_auc
|
|
value: [0.675 0.7 0.9 0.75 0.8 0.55 0.75 0.75 0.75 0.875]
|
|
|
|
mean value: 0.75
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.80499325 1. 1. 0.98684211 0.73954116
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9531376518218624
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.57142857 0.8 0.71428571 0.6 0.42857143
|
|
0.66666667 0.6 0.6 0.75 ]
|
|
|
|
mean value: 0.623095238095238
|
|
|
|
key: train_jcc
|
|
value: [1. 0.68085106 1. 1. 0.97368421 0.56521739
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9219752665660451
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01265955 0.01204038 0.0094521 0.00962925 0.00864935 0.00926232
|
|
0.00898719 0.00849867 0.00921798 0.00849581]
|
|
|
|
mean value: 0.009689259529113769
|
|
|
|
key: score_time
|
|
value: [0.01206136 0.00932217 0.00953627 0.00923824 0.00897503 0.00906634
|
|
0.00922036 0.00909305 0.00911117 0.00872612]
|
|
|
|
mean value: 0.009435009956359864
|
|
|
|
key: test_mcc
|
|
value: [-0.15811388 0.31622777 0.63245553 0.39528471 0.1 0.1
|
|
0.37796447 0.25819889 0.25819889 0.57735027]
|
|
|
|
mean value: 0.28575666442563175
|
|
|
|
key: train_mcc
|
|
value: [0.66849369 0.50674764 0.53342348 0.53279352 0.53591229 0.5064147
|
|
0.7073494 0.56428809 0.68516016 0.4897623 ]
|
|
|
|
mean value: 0.5730345270522292
|
|
|
|
key: test_accuracy
|
|
value: [0.44444444 0.55555556 0.77777778 0.66666667 0.55555556 0.55555556
|
|
0.625 0.625 0.625 0.75 ]
|
|
|
|
mean value: 0.6180555555555556
|
|
|
|
key: train_accuracy
|
|
value: [0.83116883 0.75324675 0.76623377 0.76623377 0.76623377 0.75324675
|
|
0.84615385 0.78205128 0.83333333 0.74358974]
|
|
|
|
mean value: 0.7841491841491842
|
|
|
|
key: test_fscore
|
|
value: [0.28571429 0.66666667 0.8 0.76923077 0.6 0.6
|
|
0.72727273 0.66666667 0.57142857 0.66666667]
|
|
|
|
mean value: 0.6353646353646354
|
|
|
|
key: train_fscore
|
|
value: [0.82191781 0.75324675 0.76315789 0.75675676 0.775 0.74666667
|
|
0.82857143 0.77922078 0.8115942 0.75609756]
|
|
|
|
mean value: 0.7792229851292566
|
|
|
|
key: test_precision
|
|
value: [0.33333333 0.5 0.66666667 0.625 0.6 0.6
|
|
0.57142857 0.6 0.66666667 1. ]
|
|
|
|
mean value: 0.6163095238095238
|
|
|
|
key: train_precision
|
|
value: [0.88235294 0.76315789 0.78378378 0.77777778 0.73809524 0.75675676
|
|
0.93548387 0.78947368 0.93333333 0.72093023]
|
|
|
|
mean value: 0.808114551339661
|
|
|
|
key: test_recall
|
|
value: [0.25 1. 1. 1. 0.6 0.6 1. 0.75 0.5 0.5 ]
|
|
|
|
mean value: 0.72
|
|
|
|
key: train_recall
|
|
value: [0.76923077 0.74358974 0.74358974 0.73684211 0.81578947 0.73684211
|
|
0.74358974 0.76923077 0.71794872 0.79487179]
|
|
|
|
mean value: 0.7571524966261809
|
|
|
|
key: test_roc_auc
|
|
value: [0.425 0.6 0.8 0.625 0.55 0.55 0.625 0.625 0.625 0.75 ]
|
|
|
|
mean value: 0.6175
|
|
|
|
key: train_roc_auc
|
|
value: [0.83198381 0.75337382 0.76653171 0.76585695 0.7668691 0.75303644
|
|
0.84615385 0.78205128 0.83333333 0.74358974]
|
|
|
|
mean value: 0.7842780026990553
|
|
|
|
key: test_jcc
|
|
value: [0.16666667 0.5 0.66666667 0.625 0.42857143 0.42857143
|
|
0.57142857 0.5 0.4 0.5 ]
|
|
|
|
mean value: 0.47869047619047617
|
|
|
|
key: train_jcc
|
|
value: [0.69767442 0.60416667 0.61702128 0.60869565 0.63265306 0.59574468
|
|
0.70731707 0.63829787 0.68292683 0.60784314]
|
|
|
|
mean value: 0.6392340668150881
|
|
|
|
MCC on Blind test: 0.02
|
|
|
|
Accuracy on Blind test: 0.48
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00923014 0.0090003 0.00944948 0.00926304 0.0088408 0.00969529
|
|
0.00979328 0.00931954 0.00897288 0.00965595]
|
|
|
|
mean value: 0.009322071075439453
|
|
|
|
key: score_time
|
|
value: [0.00919342 0.0087328 0.00874853 0.00913787 0.00951767 0.00950384
|
|
0.00897765 0.00919819 0.00955391 0.00890183]
|
|
|
|
mean value: 0.009146571159362793
|
|
|
|
key: test_mcc
|
|
value: [ 0.5976143 0.55 0.8 0.15811388 0.15811388 0.05976143
|
|
-0.57735027 0.25819889 -0.25819889 0.37796447]
|
|
|
|
mean value: 0.2124217704970356
|
|
|
|
key: train_mcc
|
|
value: [0.74021592 0.71670195 0.68898046 0.68898046 0.71987403 0.64957894
|
|
0.72392277 0.66864785 0.72980045 0.76948376]
|
|
|
|
mean value: 0.7096186585687321
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.77777778 0.88888889 0.55555556 0.55555556 0.55555556
|
|
0.25 0.625 0.375 0.625 ]
|
|
|
|
mean value: 0.5986111111111111
|
|
|
|
key: train_accuracy
|
|
value: [0.87012987 0.85714286 0.84415584 0.84415584 0.85714286 0.81818182
|
|
0.85897436 0.83333333 0.85897436 0.88461538]
|
|
|
|
mean value: 0.8526806526806526
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.75 0.88888889 0.5 0.5 0.66666667
|
|
0. 0.57142857 0.44444444 0.4 ]
|
|
|
|
mean value: 0.5388095238095238
|
|
|
|
key: train_fscore
|
|
value: [0.87179487 0.85333333 0.85 0.83783784 0.84507042 0.79411765
|
|
0.84931507 0.82666667 0.84507042 0.88607595]
|
|
|
|
mean value: 0.8459282219622195
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.8 0.66666667 0.66666667 0.57142857
|
|
0. 0.66666667 0.4 1. ]
|
|
|
|
mean value: 0.6521428571428571
|
|
|
|
key: train_precision
|
|
value: [0.87179487 0.88888889 0.82926829 0.86111111 0.90909091 0.9
|
|
0.91176471 0.86111111 0.9375 0.875 ]
|
|
|
|
mean value: 0.8845529890562172
|
|
|
|
key: test_recall
|
|
value: [0.5 0.75 1. 0.4 0.4 0.8 0. 0.5 0.5 0.25]
|
|
|
|
mean value: 0.51
|
|
|
|
key: train_recall
|
|
value: [0.87179487 0.82051282 0.87179487 0.81578947 0.78947368 0.71052632
|
|
0.79487179 0.79487179 0.76923077 0.8974359 ]
|
|
|
|
mean value: 0.8136302294197031
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.775 0.9 0.575 0.575 0.525 0.25 0.625 0.375 0.625]
|
|
|
|
mean value: 0.5975
|
|
|
|
key: train_roc_auc
|
|
value: [0.87010796 0.85762483 0.84379217 0.84379217 0.8562753 0.81680162
|
|
0.85897436 0.83333333 0.85897436 0.88461538]
|
|
|
|
mean value: 0.8524291497975709
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.6 0.8 0.33333333 0.33333333 0.5
|
|
0. 0.4 0.28571429 0.25 ]
|
|
|
|
mean value: 0.4002380952380952
|
|
|
|
key: train_jcc
|
|
value: [0.77272727 0.74418605 0.73913043 0.72093023 0.73170732 0.65853659
|
|
0.73809524 0.70454545 0.73170732 0.79545455]
|
|
|
|
mean value: 0.7337020444187082
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
[('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00935268 0.0094099 0.00923061 0.00814486 0.00917649 0.00927663
|
|
0.00916672 0.00931597 0.00929832 0.00910544]
|
|
|
|
mean value: 0.0091477632522583
|
|
|
|
key: score_time
|
|
value: [0.0149107 0.0150919 0.01361489 0.0100975 0.01014137 0.01008272
|
|
0.01007795 0.01012373 0.01002026 0.01466894]
|
|
|
|
mean value: 0.011882996559143067
|
|
|
|
key: test_mcc
|
|
value: [ 0. 0.55 0.8 0.55 0.31622777 0.31622777
|
|
-0.37796447 0.25819889 0.57735027 0.25819889]
|
|
|
|
mean value: 0.32482391077083966
|
|
|
|
key: train_mcc
|
|
value: [0.45639039 0.48741471 0.53591229 0.53238866 0.63928106 0.48977837
|
|
0.64442408 0.46291005 0.48782136 0.36004115]
|
|
|
|
mean value: 0.5096362111380015
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.77777778 0.88888889 0.77777778 0.55555556 0.66666667
|
|
0.375 0.625 0.75 0.625 ]
|
|
|
|
mean value: 0.6597222222222222
|
|
|
|
key: train_accuracy
|
|
value: [0.72727273 0.74025974 0.76623377 0.76623377 0.81818182 0.74025974
|
|
0.82051282 0.73076923 0.74358974 0.67948718]
|
|
|
|
mean value: 0.7532800532800533
|
|
|
|
key: test_fscore
|
|
value: [0. 0.75 0.88888889 0.8 0.33333333 0.72727273
|
|
0.54545455 0.66666667 0.66666667 0.57142857]
|
|
|
|
mean value: 0.5949711399711399
|
|
|
|
key: train_fscore
|
|
value: [0.72 0.72222222 0.75675676 0.76315789 0.80555556 0.70588235
|
|
0.81081081 0.72 0.73684211 0.66666667]
|
|
|
|
mean value: 0.7407894364953188
|
|
|
|
key: test_precision
|
|
value: [0. 0.75 0.8 0.8 1. 0.66666667
|
|
0.42857143 0.6 1. 0.66666667]
|
|
|
|
mean value: 0.6711904761904762
|
|
|
|
key: train_precision
|
|
value: [0.75 0.78787879 0.8 0.76315789 0.85294118 0.8
|
|
0.85714286 0.75 0.75675676 0.69444444]
|
|
|
|
mean value: 0.7812321917430276
|
|
|
|
key: test_recall
|
|
value: [0. 0.75 1. 0.8 0.2 0.8 0.75 0.75 0.5 0.5 ]
|
|
|
|
mean value: 0.605
|
|
|
|
key: train_recall
|
|
value: [0.69230769 0.66666667 0.71794872 0.76315789 0.76315789 0.63157895
|
|
0.76923077 0.69230769 0.71794872 0.64102564]
|
|
|
|
mean value: 0.7055330634278003
|
|
|
|
key: test_roc_auc
|
|
value: [0.5 0.775 0.9 0.775 0.6 0.65 0.375 0.625 0.75 0.625]
|
|
|
|
mean value: 0.6575
|
|
|
|
key: train_roc_auc
|
|
value: [0.72773279 0.74122807 0.7668691 0.76619433 0.81747638 0.7388664
|
|
0.82051282 0.73076923 0.74358974 0.67948718]
|
|
|
|
mean value: 0.7532726045883941
|
|
|
|
key: test_jcc
|
|
value: [0. 0.6 0.8 0.66666667 0.2 0.57142857
|
|
0.375 0.5 0.5 0.4 ]
|
|
|
|
mean value: 0.46130952380952384
|
|
|
|
key: train_jcc
|
|
value: [0.5625 0.56521739 0.60869565 0.61702128 0.6744186 0.54545455
|
|
0.68181818 0.5625 0.58333333 0.5 ]
|
|
|
|
mean value: 0.5900958985331228
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.57
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0090909 0.00947165 0.0086453 0.01064086 0.00960255 0.00955296
|
|
0.00961971 0.00936699 0.00874853 0.00870728]
|
|
|
|
mean value: 0.009344673156738282
|
|
|
|
key: score_time
|
|
value: [0.01019144 0.00849819 0.00846887 0.00918436 0.00922036 0.00914359
|
|
0.00917006 0.00848866 0.00857306 0.00854564]
|
|
|
|
mean value: 0.008948421478271485
|
|
|
|
key: test_mcc
|
|
value: [0.39528471 0.47809144 0.8 0.31622777 0.47809144 0.31622777
|
|
0. 0.5 0. 0.57735027]
|
|
|
|
mean value: 0.3861273396211864
|
|
|
|
key: train_mcc
|
|
value: [0.84537494 0.79217274 0.81836616 0.87035806 0.82046748 0.71613058
|
|
0.84615385 0.8229512 0.84615385 0.8720816 ]
|
|
|
|
mean value: 0.8250210452339253
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.88888889 0.66666667 0.66666667 0.66666667
|
|
0.5 0.75 0.5 0.75 ]
|
|
|
|
mean value: 0.6722222222222222
|
|
|
|
key: train_accuracy
|
|
value: [0.92207792 0.8961039 0.90909091 0.93506494 0.90909091 0.85714286
|
|
0.92307692 0.91025641 0.92307692 0.93589744]
|
|
|
|
mean value: 0.9120879120879121
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.72727273 0.88888889 0.72727273 0.57142857 0.72727273
|
|
0.5 0.75 0.5 0.66666667]
|
|
|
|
mean value: 0.6458802308802308
|
|
|
|
key: train_fscore
|
|
value: [0.92105263 0.8974359 0.91139241 0.93333333 0.90410959 0.84931507
|
|
0.92307692 0.90666667 0.92307692 0.93670886]
|
|
|
|
mean value: 0.9106168298525722
|
|
|
|
key: test_precision
|
|
value: [1. 0.57142857 0.8 0.66666667 1. 0.66666667
|
|
0.5 0.75 0.5 1. ]
|
|
|
|
mean value: 0.7454761904761905
|
|
|
|
key: train_precision
|
|
value: [0.94594595 0.8974359 0.9 0.94594595 0.94285714 0.88571429
|
|
0.92307692 0.94444444 0.92307692 0.925 ]
|
|
|
|
mean value: 0.9233497508497509
|
|
|
|
key: test_recall
|
|
value: [0.25 1. 1. 0.8 0.4 0.8 0.5 0.75 0.5 0.5 ]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_recall
|
|
value: [0.8974359 0.8974359 0.92307692 0.92105263 0.86842105 0.81578947
|
|
0.92307692 0.87179487 0.92307692 0.94871795]
|
|
|
|
mean value: 0.8989878542510121
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.7 0.9 0.65 0.7 0.65 0.5 0.75 0.5 0.75 ]
|
|
|
|
mean value: 0.6725
|
|
|
|
key: train_roc_auc
|
|
value: [0.92240216 0.89608637 0.90890688 0.93488529 0.9085695 0.85661269
|
|
0.92307692 0.91025641 0.92307692 0.93589744]
|
|
|
|
mean value: 0.9119770580296896
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.57142857 0.8 0.57142857 0.4 0.57142857
|
|
0.33333333 0.6 0.33333333 0.5 ]
|
|
|
|
mean value: 0.4930952380952381
|
|
|
|
key: train_jcc
|
|
value: [0.85365854 0.81395349 0.8372093 0.875 0.825 0.73809524
|
|
0.85714286 0.82926829 0.85714286 0.88095238]
|
|
|
|
mean value: 0.83674229532993
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.54
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.32213855 0.34692574 0.33995461 0.34015656 0.43762326 0.43245387
|
|
0.34698081 0.32564402 0.34216499 0.35122037]
|
|
|
|
mean value: 0.35852627754211425
|
|
|
|
key: score_time
|
|
value: [0.01188755 0.01189065 0.0119307 0.0119338 0.01207781 0.0119822
|
|
0.01191235 0.01189542 0.01186323 0.01194644]
|
|
|
|
mean value: 0.011932015419006348
|
|
|
|
key: test_mcc
|
|
value: [0.1 0.47809144 0.8 0.5976143 0.55 0.5976143
|
|
0. 0.5 0.25819889 0.77459667]
|
|
|
|
mean value: 0.46561156120567954
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.66666667 0.88888889 0.77777778 0.77777778 0.77777778
|
|
0.5 0.75 0.625 0.875 ]
|
|
|
|
mean value: 0.7194444444444444
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.72727273 0.88888889 0.83333333 0.8 0.83333333
|
|
0.6 0.75 0.57142857 0.85714286]
|
|
|
|
mean value: 0.7361399711399712
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.57142857 0.8 0.71428571 0.8 0.71428571
|
|
0.5 0.75 0.66666667 1. ]
|
|
|
|
mean value: 0.7016666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 1. 1. 0.8 1. 0.75 0.75 0.5 0.75]
|
|
|
|
mean value: 0.805
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.55 0.7 0.9 0.75 0.775 0.75 0.5 0.75 0.625 0.875]
|
|
|
|
mean value: 0.7175
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.57142857 0.8 0.71428571 0.66666667 0.71428571
|
|
0.42857143 0.6 0.4 0.75 ]
|
|
|
|
mean value: 0.5978571428571429
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01298523 0.01243281 0.00981116 0.00963116 0.00943565 0.009233
|
|
0.00946522 0.00921392 0.0091486 0.00971317]
|
|
|
|
mean value: 0.010106992721557618
|
|
|
|
key: score_time
|
|
value: [0.01131511 0.00947094 0.0087316 0.00852537 0.00846863 0.00840878
|
|
0.00836754 0.00833654 0.00854063 0.00851274]
|
|
|
|
mean value: 0.008867788314819335
|
|
|
|
key: test_mcc
|
|
value: [0.55 0.8 0.55 1. 0.8 1.
|
|
0.57735027 0.77459667 0.57735027 1. ]
|
|
|
|
mean value: 0.7629297207620735
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.77777778 1. 0.88888889 1.
|
|
0.75 0.875 0.75 1. ]
|
|
|
|
mean value: 0.8708333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.88888889 0.75 1. 0.88888889 1.
|
|
0.8 0.85714286 0.66666667 1. ]
|
|
|
|
mean value: 0.8601587301587301
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8 0.75 1. 1. 1.
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: 0.8966666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.75 1. 0.8 1. 1. 0.75 0.5 1. ]
|
|
|
|
mean value: 0.855
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.9 0.775 1. 0.9 1. 0.75 0.875 0.75 1. ]
|
|
|
|
mean value: 0.8725
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.8 0.6 1. 0.8 1.
|
|
0.66666667 0.75 0.5 1. ]
|
|
|
|
mean value: 0.7716666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08166194 0.08092213 0.08124018 0.0808332 0.08057642 0.08089733
|
|
0.08081889 0.08135033 0.08086824 0.08122468]
|
|
|
|
mean value: 0.08103933334350585
|
|
|
|
key: score_time
|
|
value: [0.01668835 0.0169456 0.01684809 0.01695967 0.01681328 0.01679158
|
|
0.01694441 0.01681828 0.0169208 0.01684427]
|
|
|
|
mean value: 0.016857433319091796
|
|
|
|
key: test_mcc
|
|
value: [0.55 1. 0.8 0.31622777 0.35 0.79056942
|
|
0. 0.5 0.25819889 0.77459667]
|
|
|
|
mean value: 0.5339592740047577
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 1. 0.88888889 0.66666667 0.66666667 0.88888889
|
|
0.5 0.75 0.625 0.875 ]
|
|
|
|
mean value: 0.7638888888888888
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 1. 0.88888889 0.72727273 0.66666667 0.90909091
|
|
0.6 0.75 0.57142857 0.85714286]
|
|
|
|
mean value: 0.772049062049062
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 1. 0.8 0.66666667 0.75 0.83333333
|
|
0.5 0.75 0.66666667 1. ]
|
|
|
|
mean value: 0.7716666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.8 0.6 1. 0.75 0.75 0.5 0.75]
|
|
|
|
mean value: 0.79
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 1. 0.9 0.65 0.675 0.875 0.5 0.75 0.625 0.875]
|
|
|
|
mean value: 0.7625000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 1. 0.8 0.57142857 0.5 0.83333333
|
|
0.42857143 0.6 0.4 0.75 ]
|
|
|
|
mean value: 0.6483333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.01
|
|
|
|
Accuracy on Blind test: 0.6
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00858474 0.00827479 0.00837827 0.00822592 0.00829554 0.0083642
|
|
0.00828242 0.00841022 0.00844193 0.00838256]
|
|
|
|
mean value: 0.008364057540893555
|
|
|
|
key: score_time
|
|
value: [0.00838494 0.00850153 0.00845408 0.00845599 0.00838614 0.00832748
|
|
0.00842834 0.00836754 0.00846052 0.00834703]
|
|
|
|
mean value: 0.008411359786987305
|
|
|
|
key: test_mcc
|
|
value: [ 0.1 -0.05976143 0.55 0.5976143 0.8 0.5976143
|
|
-0.25819889 0. 0. 0. ]
|
|
|
|
mean value: 0.2327268289120513
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.44444444 0.77777778 0.77777778 0.88888889 0.77777778
|
|
0.375 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.6097222222222223
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.5 0.54545455 0.75 0.83333333 0.88888889 0.83333333
|
|
0.44444444 0.5 0.5 0.33333333]
|
|
|
|
mean value: 0.6128787878787879
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.42857143 0.75 0.71428571 1. 0.71428571
|
|
0.4 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.6007142857142858
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.75 0.75 1. 0.8 1. 0.5 0.5 0.5 0.25]
|
|
|
|
mean value: 0.655
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.55 0.475 0.775 0.75 0.9 0.75 0.375 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.6075
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.33333333 0.375 0.6 0.71428571 0.8 0.71428571
|
|
0.28571429 0.33333333 0.33333333 0.2 ]
|
|
|
|
mean value: 0.4689285714285714
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.01921344 1.02071166 1.02129936 1.0181725 1.01626253 1.0281322
|
|
1.03024292 1.02138448 1.02007842 1.02826262]
|
|
|
|
mean value: 1.022376012802124
|
|
|
|
key: score_time
|
|
value: [0.08772469 0.08730388 0.08674073 0.0869844 0.08715105 0.08721757
|
|
0.08726931 0.0868032 0.14008069 0.09193897]
|
|
|
|
mean value: 0.09292144775390625
|
|
|
|
key: test_mcc
|
|
value: [0.55 0.8 0.55 0.55 0.15811388 1.
|
|
0.25819889 0.77459667 0.57735027 0.77459667]
|
|
|
|
mean value: 0.5992856380428173
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.77777778 0.77777778 0.55555556 1.
|
|
0.625 0.875 0.75 0.875 ]
|
|
|
|
mean value: 0.7902777777777777
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.88888889 0.75 0.8 0.5 1.
|
|
0.66666667 0.85714286 0.66666667 0.85714286]
|
|
|
|
mean value: 0.7736507936507937
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8 0.75 0.8 0.66666667 1.
|
|
0.6 1. 1. 1. ]
|
|
|
|
mean value: 0.8366666666666667
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.75 0.8 0.4 1. 0.75 0.75 0.5 0.75]
|
|
|
|
mean value: 0.745
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.9 0.775 0.775 0.575 1. 0.625 0.875 0.75 0.875]
|
|
|
|
mean value: 0.7925
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.8 0.6 0.66666667 0.33333333 1.
|
|
0.5 0.75 0.5 0.75 ]
|
|
|
|
mean value: 0.65
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.62
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.8344028 0.83407116 0.86222911 0.86600399 0.8268683 0.86827183
|
|
0.97598505 0.82194805 0.89098978 0.82547593]
|
|
|
|
mean value: 0.8606245994567872
|
|
|
|
key: score_time
|
|
value: [0.21745658 0.13854885 0.21888685 0.18107748 0.22129059 0.20861673
|
|
0.14700174 0.21403289 0.17643619 0.23291492]
|
|
|
|
mean value: 0.19562628269195556
|
|
|
|
key: test_mcc
|
|
value: [0.55 0.63245553 0.8 0.55 0.15811388 0.5976143
|
|
0. 0.5 0.25819889 0.77459667]
|
|
|
|
mean value: 0.4820979278697936
|
|
|
|
key: train_mcc
|
|
value: [0.94935876 0.97435897 0.94935876 0.94929201 0.97434188 0.97434188
|
|
0.94996791 0.94996791 0.97467943 0.97467943]
|
|
|
|
mean value: 0.9620346953307984
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.77777778 0.88888889 0.77777778 0.55555556 0.77777778
|
|
0.5 0.75 0.625 0.875 ]
|
|
|
|
mean value: 0.7305555555555555
|
|
|
|
key: train_accuracy
|
|
value: [0.97402597 0.98701299 0.97402597 0.97402597 0.98701299 0.98701299
|
|
0.97435897 0.97435897 0.98717949 0.98717949]
|
|
|
|
mean value: 0.9806193806193806
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.8 0.88888889 0.8 0.5 0.83333333
|
|
0.6 0.75 0.57142857 0.85714286]
|
|
|
|
mean value: 0.7350793650793651
|
|
|
|
key: train_fscore
|
|
value: [0.97368421 0.98701299 0.97368421 0.97297297 0.98666667 0.98666667
|
|
0.97368421 0.97368421 0.98701299 0.98701299]
|
|
|
|
mean value: 0.9802082109450531
|
|
|
|
key: test_precision
|
|
value: [0.75 0.66666667 0.8 0.8 0.66666667 0.71428571
|
|
0.5 0.75 0.66666667 1. ]
|
|
|
|
mean value: 0.7314285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 0.8 0.4 1. 0.75 0.75 0.5 0.75]
|
|
|
|
mean value: 0.77
|
|
|
|
key: train_recall
|
|
value: [0.94871795 0.97435897 0.94871795 0.94736842 0.97368421 0.97368421
|
|
0.94871795 0.94871795 0.97435897 0.97435897]
|
|
|
|
mean value: 0.9612685560053981
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.8 0.9 0.775 0.575 0.75 0.5 0.75 0.625 0.875]
|
|
|
|
mean value: 0.7325
|
|
|
|
key: train_roc_auc
|
|
value: [0.97435897 0.98717949 0.97435897 0.97368421 0.98684211 0.98684211
|
|
0.97435897 0.97435897 0.98717949 0.98717949]
|
|
|
|
mean value: 0.9806342780026991
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.66666667 0.8 0.66666667 0.33333333 0.71428571
|
|
0.42857143 0.6 0.4 0.75 ]
|
|
|
|
mean value: 0.5959523809523809
|
|
|
|
key: train_jcc
|
|
value: [0.94871795 0.97435897 0.94871795 0.94736842 0.97368421 0.97368421
|
|
0.94871795 0.94871795 0.97435897 0.97435897]
|
|
|
|
mean value: 0.9612685560053981
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02049661 0.00852871 0.0086658 0.00836873 0.00848603 0.00852489
|
|
0.00861835 0.00847673 0.00877643 0.00880885]
|
|
|
|
mean value: 0.009775114059448243
|
|
|
|
key: score_time
|
|
value: [0.01595473 0.00848293 0.00887704 0.00862145 0.00873375 0.00857711
|
|
0.00855875 0.00848603 0.00864792 0.00869274]
|
|
|
|
mean value: 0.009363245964050294
|
|
|
|
key: test_mcc
|
|
value: [ 0.5976143 0.55 0.8 0.15811388 0.15811388 0.05976143
|
|
-0.57735027 0.25819889 -0.25819889 0.37796447]
|
|
|
|
mean value: 0.2124217704970356
|
|
|
|
key: train_mcc
|
|
value: [0.74021592 0.71670195 0.68898046 0.68898046 0.71987403 0.64957894
|
|
0.72392277 0.66864785 0.72980045 0.76948376]
|
|
|
|
mean value: 0.7096186585687321
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.77777778 0.88888889 0.55555556 0.55555556 0.55555556
|
|
0.25 0.625 0.375 0.625 ]
|
|
|
|
mean value: 0.5986111111111111
|
|
|
|
key: train_accuracy
|
|
value: [0.87012987 0.85714286 0.84415584 0.84415584 0.85714286 0.81818182
|
|
0.85897436 0.83333333 0.85897436 0.88461538]
|
|
|
|
mean value: 0.8526806526806526
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.75 0.88888889 0.5 0.5 0.66666667
|
|
0. 0.57142857 0.44444444 0.4 ]
|
|
|
|
mean value: 0.5388095238095238
|
|
|
|
key: train_fscore
|
|
value: [0.87179487 0.85333333 0.85 0.83783784 0.84507042 0.79411765
|
|
0.84931507 0.82666667 0.84507042 0.88607595]
|
|
|
|
mean value: 0.8459282219622195
|
|
|
|
key: test_precision
|
|
value: [1. 0.75 0.8 0.66666667 0.66666667 0.57142857
|
|
0. 0.66666667 0.4 1. ]
|
|
|
|
mean value: 0.6521428571428571
|
|
|
|
key: train_precision
|
|
value: [0.87179487 0.88888889 0.82926829 0.86111111 0.90909091 0.9
|
|
0.91176471 0.86111111 0.9375 0.875 ]
|
|
|
|
mean value: 0.8845529890562172
|
|
|
|
key: test_recall
|
|
value: [0.5 0.75 1. 0.4 0.4 0.8 0. 0.5 0.5 0.25]
|
|
|
|
mean value: 0.51
|
|
|
|
key: train_recall
|
|
value: [0.87179487 0.82051282 0.87179487 0.81578947 0.78947368 0.71052632
|
|
0.79487179 0.79487179 0.76923077 0.8974359 ]
|
|
|
|
mean value: 0.8136302294197031
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.775 0.9 0.575 0.575 0.525 0.25 0.625 0.375 0.625]
|
|
|
|
mean value: 0.5975
|
|
|
|
key: train_roc_auc
|
|
value: [0.87010796 0.85762483 0.84379217 0.84379217 0.8562753 0.81680162
|
|
0.85897436 0.83333333 0.85897436 0.88461538]
|
|
|
|
mean value: 0.8524291497975709
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.6 0.8 0.33333333 0.33333333 0.5
|
|
0. 0.4 0.28571429 0.25 ]
|
|
|
|
mean value: 0.4002380952380952
|
|
|
|
key: train_jcc
|
|
value: [0.77272727 0.74418605 0.73913043 0.72093023 0.73170732 0.65853659
|
|
0.73809524 0.70454545 0.73170732 0.79545455]
|
|
|
|
mean value: 0.7337020444187082
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.0401094 0.03476858 0.03738213 0.0384171 0.03525496 0.03481197
|
|
0.03531337 0.03826714 0.0395844 0.03738236]
|
|
|
|
mean value: 0.03712913990020752
|
|
|
|
key: score_time
|
|
value: [0.01210713 0.01098847 0.01114774 0.0102675 0.01021433 0.0108428
|
|
0.01119161 0.01130676 0.01111627 0.01118207]
|
|
|
|
mean value: 0.01103646755218506
|
|
|
|
key: test_mcc
|
|
value: [0.8 0.8 1. 1. 0.8 1.
|
|
1. 1. 0.57735027 1. ]
|
|
|
|
mean value: 0.8977350269189626
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.88888889 0.88888889 1. 1. 0.88888889 1.
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9416666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.88888889 0.88888889 1. 1. 0.88888889 1.
|
|
1. 1. 0.66666667 1. ]
|
|
|
|
mean value: 0.9333333333333333
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.8 0.8 1. 1. 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.96
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 0.8 1. 1. 1. 0.5 1. ]
|
|
|
|
mean value: 0.93
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9 0.9 1. 1. 0.9 1. 1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9450000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.8 0.8 1. 1. 0.8 1. 1. 1. 0.5 1. ]
|
|
|
|
mean value: 0.89
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.71
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02817845 0.04007769 0.06127048 0.04485011 0.04049444 0.0356071
|
|
0.04591703 0.04527164 0.04855704 0.03276825]
|
|
|
|
mean value: 0.04229922294616699
|
|
|
|
key: score_time
|
|
value: [0.02117968 0.02192545 0.01181006 0.01930475 0.02256536 0.02384424
|
|
0.0226078 0.02120304 0.03123069 0.02361965]
|
|
|
|
mean value: 0.021929073333740234
|
|
|
|
key: test_mcc
|
|
value: [0.15811388 0.15811388 0.31622777 0.5976143 0.05976143 0.8
|
|
0.57735027 0.5 0.5 1. ]
|
|
|
|
mean value: 0.46671815363572183
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.55555556 0.66666667 0.77777778 0.55555556 0.88888889
|
|
0.75 0.75 0.75 1. ]
|
|
|
|
mean value: 0.725
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.6 0.6 0.57142857 0.83333333 0.66666667 0.88888889
|
|
0.8 0.75 0.75 1. ]
|
|
|
|
mean value: 0.746031746031746
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.5 0.66666667 0.71428571 0.57142857 1.
|
|
0.66666667 0.75 0.75 1. ]
|
|
|
|
mean value: 0.7119047619047619
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.5 1. 0.8 0.8 1. 0.75 0.75 1. ]
|
|
|
|
mean value: 0.81
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.575 0.575 0.65 0.75 0.525 0.9 0.75 0.75 0.75 1. ]
|
|
|
|
mean value: 0.7225
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.42857143 0.42857143 0.4 0.71428571 0.5 0.8
|
|
0.66666667 0.6 0.6 1. ]
|
|
|
|
mean value: 0.6138095238095238
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.5
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02220154 0.00995994 0.00930357 0.00952268 0.0093174 0.0093205
|
|
0.00861454 0.00954103 0.00942183 0.00962305]
|
|
|
|
mean value: 0.01068260669708252
|
|
|
|
key: score_time
|
|
value: [0.00999022 0.00950193 0.00910759 0.008986 0.00915051 0.00919533
|
|
0.00878143 0.00928998 0.00907397 0.00929141]
|
|
|
|
mean value: 0.009236836433410644
|
|
|
|
key: test_mcc
|
|
value: [0.39528471 0.47809144 1. 0.39528471 0.15811388 0.1
|
|
0. 0.25819889 0.25819889 0. ]
|
|
|
|
mean value: 0.3043172521278593
|
|
|
|
key: train_mcc
|
|
value: [0.53238866 0.42847503 0.40311986 0.37940693 0.53342348 0.40243088
|
|
0.48717949 0.46291005 0.53846154 0.46291005]
|
|
|
|
mean value: 0.46307059797406186
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 1. 0.66666667 0.55555556 0.55555556
|
|
0.5 0.625 0.625 0.5 ]
|
|
|
|
mean value: 0.6361111111111111
|
|
|
|
key: train_accuracy
|
|
value: [0.76623377 0.71428571 0.7012987 0.68831169 0.76623377 0.7012987
|
|
0.74358974 0.73076923 0.76923077 0.73076923]
|
|
|
|
mean value: 0.7312021312021312
|
|
|
|
key: test_fscore
|
|
value: [0.4 0.72727273 1. 0.76923077 0.5 0.6
|
|
0.6 0.57142857 0.57142857 0.33333333]
|
|
|
|
mean value: 0.6072693972693973
|
|
|
|
key: train_fscore
|
|
value: [0.76923077 0.71794872 0.71604938 0.7 0.76923077 0.69333333
|
|
0.74358974 0.72 0.76923077 0.74074074]
|
|
|
|
mean value: 0.7339354226020893
|
|
|
|
key: test_precision
|
|
value: [1. 0.57142857 1. 0.625 0.66666667 0.6
|
|
0.5 0.66666667 0.66666667 0.5 ]
|
|
|
|
mean value: 0.6796428571428571
|
|
|
|
key: train_precision
|
|
value: [0.76923077 0.71794872 0.69047619 0.66666667 0.75 0.7027027
|
|
0.74358974 0.75 0.76923077 0.71428571]
|
|
|
|
mean value: 0.7274131274131275
|
|
|
|
key: test_recall
|
|
value: [0.25 1. 1. 1. 0.4 0.6 0.75 0.5 0.5 0.25]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_recall
|
|
value: [0.76923077 0.71794872 0.74358974 0.73684211 0.78947368 0.68421053
|
|
0.74358974 0.69230769 0.76923077 0.76923077]
|
|
|
|
mean value: 0.7415654520917679
|
|
|
|
key: test_roc_auc
|
|
value: [0.625 0.7 1. 0.625 0.575 0.55 0.5 0.625 0.625 0.5 ]
|
|
|
|
mean value: 0.6325
|
|
|
|
key: train_roc_auc
|
|
value: [0.76619433 0.71423752 0.70074224 0.68893387 0.76653171 0.70107962
|
|
0.74358974 0.73076923 0.76923077 0.73076923]
|
|
|
|
mean value: 0.7312078272604589
|
|
|
|
key: test_jcc
|
|
value: [0.25 0.57142857 1. 0.625 0.33333333 0.42857143
|
|
0.42857143 0.4 0.4 0.2 ]
|
|
|
|
mean value: 0.4636904761904762
|
|
|
|
key: train_jcc
|
|
value: [0.625 0.56 0.55769231 0.53846154 0.625 0.53061224
|
|
0.59183673 0.5625 0.625 0.58823529]
|
|
|
|
mean value: 0.580433811986333
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01010633 0.01337552 0.01346779 0.01255846 0.01274538 0.01246047
|
|
0.01407218 0.01253891 0.01436639 0.01313567]
|
|
|
|
mean value: 0.012882709503173828
|
|
|
|
key: score_time
|
|
value: [0.00920749 0.01127768 0.01129055 0.01131392 0.01129127 0.0113771
|
|
0.01156116 0.01149035 0.0113523 0.01129508]
|
|
|
|
mean value: 0.011145687103271485
|
|
|
|
key: test_mcc
|
|
value: [0.31622777 0.63245553 0.8 0.39528471 0.63245553 0.79056942
|
|
0.57735027 0.25819889 0.5 0.77459667]
|
|
|
|
mean value: 0.5677138780825602
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 0.80937951 0.6642433 0.97435897 0.74617462
|
|
1. 0.9258201 1. 0.97467943]
|
|
|
|
mean value: 0.9094655927553618
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.77777778 0.88888889 0.66666667 0.77777778 0.88888889
|
|
0.75 0.625 0.75 0.875 ]
|
|
|
|
mean value: 0.7666666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 0.8961039 0.80519481 0.98701299 0.85714286
|
|
1. 0.96153846 1. 0.98717949]
|
|
|
|
mean value: 0.9494172494172495
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.8 0.88888889 0.76923077 0.75 0.90909091
|
|
0.8 0.66666667 0.75 0.85714286]
|
|
|
|
mean value: 0.7762448662448662
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 0.90697674 0.83516484 0.98701299 0.87356322
|
|
1. 0.96296296 1. 0.98701299]
|
|
|
|
mean value: 0.9552693734730623
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.8 0.625 1. 0.83333333
|
|
0.66666667 0.6 0.75 1. ]
|
|
|
|
mean value: 0.7608333333333334
|
|
|
|
key: train_precision
|
|
value: [1. 1. 0.82978723 0.71698113 0.97435897 0.7755102
|
|
1. 0.92857143 1. 1. ]
|
|
|
|
mean value: 0.922520897313006
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 1. 1. 0.6 1. 1. 0.75 0.75 0.75]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
1. 1. 1. 0.97435897]
|
|
|
|
mean value: 0.9974358974358974
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.8 0.9 0.625 0.8 0.875 0.75 0.625 0.75 0.875]
|
|
|
|
mean value: 0.765
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 0.89473684 0.80769231 0.98717949 0.85897436
|
|
1. 0.96153846 1. 0.98717949]
|
|
|
|
mean value: 0.9497300944669366
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.66666667 0.8 0.625 0.6 0.83333333
|
|
0.66666667 0.5 0.6 0.75 ]
|
|
|
|
mean value: 0.6441666666666667
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 0.82978723 0.71698113 0.97435897 0.7755102
|
|
1. 0.92857143 1. 0.97435897]
|
|
|
|
mean value: 0.9199567947489035
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.44
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01249099 0.0119276 0.01191974 0.01216722 0.01207805 0.01217175
|
|
0.01221275 0.01212144 0.0122242 0.01232362]
|
|
|
|
mean value: 0.012163734436035157
|
|
|
|
key: score_time
|
|
value: [0.0113256 0.01128769 0.01134348 0.01128006 0.01128006 0.01129794
|
|
0.01123834 0.01133847 0.01128244 0.0112834 ]
|
|
|
|
mean value: 0.011295747756958009
|
|
|
|
key: test_mcc
|
|
value: [0.31622777 0.63245553 0.8 0.39528471 0.63245553 0.5976143
|
|
0.57735027 0.5 0.5 0.77459667]
|
|
|
|
mean value: 0.5725984780703544
|
|
|
|
key: train_mcc
|
|
value: [1. 0.94804318 0.74445317 0.68442809 0.82485566 0.6642433
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.8866023410978029
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.77777778 0.88888889 0.66666667 0.77777778 0.77777778
|
|
0.75 0.75 0.75 0.875 ]
|
|
|
|
mean value: 0.7680555555555555
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.97402597 0.85714286 0.81818182 0.90909091 0.80519481
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9363636363636364
|
|
|
|
key: test_fscore
|
|
value: [0.57142857 0.8 0.88888889 0.76923077 0.75 0.83333333
|
|
0.8 0.75 0.75 0.85714286]
|
|
|
|
mean value: 0.777002442002442
|
|
|
|
key: train_fscore
|
|
value: [1. 0.97435897 0.87640449 0.84444444 0.90140845 0.83516484
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9431781199054502
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.66666667 0.8 0.625 1. 0.71428571
|
|
0.66666667 0.75 0.75 1. ]
|
|
|
|
mean value: 0.7639285714285714
|
|
|
|
key: train_precision
|
|
value: [1. 0.97435897 0.78 0.73076923 0.96969697 0.71698113
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9171806306900646
|
|
|
|
key: test_recall
|
|
value: [0.5 1. 1. 1. 0.6 1. 1. 0.75 0.75 0.75]
|
|
|
|
mean value: 0.835
|
|
|
|
key: train_recall
|
|
value: [1. 0.97435897 1. 1. 0.84210526 1.
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9816464237516869
|
|
|
|
key: test_roc_auc
|
|
value: [0.65 0.8 0.9 0.625 0.8 0.75 0.75 0.75 0.75 0.875]
|
|
|
|
mean value: 0.765
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.97402159 0.85526316 0.82051282 0.90823212 0.80769231
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9365721997300944
|
|
|
|
key: test_jcc
|
|
value: [0.4 0.66666667 0.8 0.625 0.6 0.71428571
|
|
0.66666667 0.6 0.6 0.75 ]
|
|
|
|
mean value: 0.6422619047619047
|
|
|
|
key: train_jcc
|
|
value: [1. 0.95 0.78 0.73076923 0.82051282 0.71698113
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.8998263183357523
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.09537435 0.08456874 0.08424854 0.08335257 0.08354545 0.08079076
|
|
0.08060503 0.08644676 0.08553648 0.08298945]
|
|
|
|
mean value: 0.08474581241607666
|
|
|
|
key: score_time
|
|
value: [0.01557899 0.01554394 0.01590896 0.01558757 0.01592422 0.01518011
|
|
0.01472735 0.01587462 0.01568294 0.01505804]
|
|
|
|
mean value: 0.015506672859191894
|
|
|
|
key: test_mcc
|
|
value: [0.55 0.8 0.8 1. 1. 1.
|
|
1. 1. 0.57735027 1. ]
|
|
|
|
mean value: 0.8727350269189627
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.88888889 1. 1. 1.
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9305555555555556
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.88888889 0.88888889 1. 1. 1.
|
|
1. 1. 0.66666667 1. ]
|
|
|
|
mean value: 0.9194444444444445
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8 0.8 1. 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.935
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 1. 1. 1. 1. 1. 0.5 1. ]
|
|
|
|
mean value: 0.925
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.9 0.9 1. 1. 1. 1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9325
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.8 0.8 1. 1. 1. 1. 1. 0.5 1. ]
|
|
|
|
mean value: 0.87
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.55
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0307467 0.02915907 0.04344821 0.03888488 0.04137826 0.04140258
|
|
0.03178215 0.05092216 0.04483032 0.04534054]
|
|
|
|
mean value: 0.03978948593139649
|
|
|
|
key: score_time
|
|
value: [0.02144599 0.02702022 0.02193284 0.03621364 0.02020788 0.02563
|
|
0.02068305 0.0353334 0.03633976 0.02274179]
|
|
|
|
mean value: 0.02675485610961914
|
|
|
|
key: test_mcc
|
|
value: [0.55 0.8 0.55 1. 0.8 1.
|
|
1. 1. 0.57735027 1. ]
|
|
|
|
mean value: 0.8277350269189626
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 0.77777778 1. 0.88888889 1.
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9083333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.88888889 0.75 1. 0.88888889 1.
|
|
1. 1. 0.66666667 1. ]
|
|
|
|
mean value: 0.8944444444444445
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8 0.75 1. 1. 1. 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.93
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 0.75 1. 0.8 1. 1. 1. 0.5 1. ]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.9 0.775 1. 0.9 1. 1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.91
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.8 0.6 1. 0.8 1. 1. 1. 0.5 1. ]
|
|
|
|
mean value: 0.83
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01333833 0.01516891 0.0148015 0.01531124 0.01535654 0.01530838
|
|
0.01538754 0.01552606 0.01552677 0.01573038]
|
|
|
|
mean value: 0.015145564079284668
|
|
|
|
key: score_time
|
|
value: [0.01117516 0.01111007 0.01163745 0.01162291 0.01162505 0.01161265
|
|
0.01158023 0.01157904 0.01167965 0.01166892]
|
|
|
|
mean value: 0.011529111862182617
|
|
|
|
key: test_mcc
|
|
value: [ 0.05976143 0.35 0.8 0.31622777 0.47809144 0.5976143
|
|
-0.25819889 0.25819889 0.25819889 0.25819889]
|
|
|
|
mean value: 0.3118092724378834
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.55555556 0.66666667 0.88888889 0.66666667 0.66666667 0.77777778
|
|
0.375 0.625 0.625 0.625 ]
|
|
|
|
mean value: 0.6472222222222223
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.33333333 0.66666667 0.88888889 0.72727273 0.57142857 0.83333333
|
|
0.44444444 0.66666667 0.57142857 0.57142857]
|
|
|
|
mean value: 0.6274891774891774
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.5 0.6 0.8 0.66666667 1. 0.71428571
|
|
0.4 0.6 0.66666667 0.66666667]
|
|
|
|
mean value: 0.6614285714285715
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.25 0.75 1. 0.8 0.4 1. 0.5 0.75 0.5 0.5 ]
|
|
|
|
mean value: 0.645
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.525 0.675 0.9 0.65 0.7 0.75 0.375 0.625 0.625 0.625]
|
|
|
|
mean value: 0.645
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.2 0.5 0.8 0.57142857 0.4 0.71428571
|
|
0.28571429 0.5 0.4 0.4 ]
|
|
|
|
mean value: 0.47714285714285715
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.08
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.15129781 0.11171222 0.17469287 0.15850496 0.1549716 0.13646603
|
|
0.15618372 0.1372323 0.12951589 0.15986323]
|
|
|
|
mean value: 0.14704406261444092
|
|
|
|
key: score_time
|
|
value: [0.00914025 0.00896955 0.00894713 0.00897336 0.00900054 0.00885558
|
|
0.00901031 0.00896049 0.00905704 0.00884628]
|
|
|
|
mean value: 0.008976054191589356
|
|
|
|
key: test_mcc
|
|
value: [0.55 0.8 1. 1. 0.8 1.
|
|
0.57735027 0.77459667 0.57735027 1. ]
|
|
|
|
mean value: 0.8079297207620735
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.77777778 0.88888889 1. 1. 0.88888889 1.
|
|
0.75 0.875 0.75 1. ]
|
|
|
|
mean value: 0.8930555555555555
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.75 0.88888889 1. 1. 0.88888889 1.
|
|
0.8 0.85714286 0.66666667 1. ]
|
|
|
|
mean value: 0.8851587301587301
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.75 0.8 1. 1. 1. 1.
|
|
0.66666667 1. 1. 1. ]
|
|
|
|
mean value: 0.9216666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 1. 0.8 1. 1. 0.75 0.5 1. ]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.775 0.9 1. 1. 0.9 1. 0.75 0.875 0.75 1. ]
|
|
|
|
mean value: 0.895
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.6 0.8 1. 1. 0.8 1.
|
|
0.66666667 0.75 0.5 1. ]
|
|
|
|
mean value: 0.8116666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01548123 0.01329041 0.0134387 0.01483011 0.01330495 0.0194087
|
|
0.01376367 0.02508187 0.01355028 0.01389837]
|
|
|
|
mean value: 0.015604829788208008
|
|
|
|
key: score_time
|
|
value: [0.01172805 0.01162148 0.01177287 0.01175046 0.01169348 0.01178837
|
|
0.01441002 0.01195979 0.01457405 0.01157618]
|
|
|
|
mean value: 0.012287473678588868
|
|
|
|
key: test_mcc
|
|
value: [-0.1 -0.1 0.05976143 -0.05976143 0.1 0.15811388
|
|
0.77459667 0.5 -0.25819889 0. ]
|
|
|
|
mean value: 0.10745116625027412
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.44444444 0.44444444 0.55555556 0.44444444 0.55555556 0.55555556
|
|
0.875 0.75 0.375 0.5 ]
|
|
|
|
mean value: 0.55
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.44444444 0.44444444 0.33333333 0.28571429 0.6 0.5
|
|
0.85714286 0.75 0.28571429 0.5 ]
|
|
|
|
mean value: 0.5000793650793651
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.4 0.4 0.5 0.5 0.6 0.66666667
|
|
1. 0.75 0.33333333 0.5 ]
|
|
|
|
mean value: 0.565
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.5 0.5 0.25 0.2 0.6 0.4 0.75 0.75 0.25 0.5 ]
|
|
|
|
mean value: 0.47
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.45 0.45 0.525 0.475 0.55 0.575 0.875 0.75 0.375 0.5 ]
|
|
|
|
mean value: 0.5525
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.28571429 0.28571429 0.2 0.16666667 0.42857143 0.33333333
|
|
0.75 0.6 0.16666667 0.33333333]
|
|
|
|
mean value: 0.355
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.03
|
|
|
|
Accuracy on Blind test: 0.51
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02107096 0.01260209 0.03267598 0.05434585 0.02943563 0.03695607
|
|
0.03151107 0.03148913 0.03227115 0.03163171]
|
|
|
|
mean value: 0.031398963928222653
|
|
|
|
key: score_time
|
|
value: [0.01179409 0.01174188 0.02450228 0.02844524 0.02454138 0.02180123
|
|
0.02072978 0.02000141 0.02119827 0.0115056 ]
|
|
|
|
mean value: 0.01962611675262451
|
|
|
|
key: test_mcc
|
|
value: [0.35 0.47809144 0.8 0.79056942 0.55 1.
|
|
0.57735027 0.5 0.5 0.77459667]
|
|
|
|
mean value: 0.6320607797206962
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97435897 0.97435897 0.97434188 0.97434188 0.97434188
|
|
0.97467943 0.97467943 1. 0.97467943]
|
|
|
|
mean value: 0.9795781901615055
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.88888889 0.88888889 0.77777778 1.
|
|
0.75 0.75 0.75 0.875 ]
|
|
|
|
mean value: 0.8013888888888889
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98701299 0.98701299 0.98701299 0.98701299 0.98701299
|
|
0.98717949 0.98717949 1. 0.98717949]
|
|
|
|
mean value: 0.9896603396603396
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72727273 0.88888889 0.90909091 0.8 1.
|
|
0.8 0.75 0.75 0.85714286]
|
|
|
|
mean value: 0.8149062049062049
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98701299 0.98701299 0.98666667 0.98666667 0.98666667
|
|
0.98701299 0.98701299 1. 0.98701299]
|
|
|
|
mean value: 0.9895064935064934
|
|
|
|
key: test_precision
|
|
value: [0.6 0.57142857 0.8 0.83333333 0.8 1.
|
|
0.66666667 0.75 0.75 1. ]
|
|
|
|
mean value: 0.7771428571428571
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 1. 0.8 1. 1. 0.75 0.75 0.75]
|
|
|
|
mean value: 0.88
|
|
|
|
key: train_recall
|
|
value: [1. 0.97435897 0.97435897 0.97368421 0.97368421 0.97368421
|
|
0.97435897 0.97435897 1. 0.97435897]
|
|
|
|
mean value: 0.9792847503373819
|
|
|
|
key: test_roc_auc
|
|
value: [0.675 0.7 0.9 0.875 0.775 1. 0.75 0.75 0.75 0.875]
|
|
|
|
mean value: 0.805
|
|
|
|
key: train_roc_auc
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:175: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:178: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rus_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[1. 0.98717949 0.98717949 0.98684211 0.98684211 0.98684211
|
|
0.98717949 0.98717949 1. 0.98717949]
|
|
|
|
mean value: 0.989642375168691
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.57142857 0.8 0.83333333 0.66666667 1.
|
|
0.66666667 0.6 0.6 0.75 ]
|
|
|
|
mean value: 0.6988095238095238
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97435897 0.97435897 0.97368421 0.97368421 0.97368421
|
|
0.97435897 0.97435897 1. 0.97435897]
|
|
|
|
mean value: 0.9792847503373819
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.08567047 0.17216563 0.17648339 0.17911506 0.1741271 0.1765976
|
|
0.17673564 0.22768736 0.23544955 0.17722917]
|
|
|
|
mean value: 0.17812609672546387
|
|
|
|
key: score_time
|
|
value: [0.0117352 0.01921391 0.02300906 0.02202415 0.02308655 0.02050304
|
|
0.02041197 0.02305675 0.0117054 0.02008557]
|
|
|
|
mean value: 0.019483160972595216
|
|
|
|
key: test_mcc
|
|
value: [0.35 0.47809144 0.8 0.79056942 0.55 1.
|
|
0.25819889 0.5 0. 0.77459667]
|
|
|
|
mean value: 0.5501456417764496
|
|
|
|
key: train_mcc
|
|
value: [1. 0.97435897 0.97435897 0.97434188 0.97434188 0.97434188
|
|
0.8720816 0.97467943 0.79591906 0.97467943]
|
|
|
|
mean value: 0.948910312364934
|
|
|
|
key: test_accuracy
|
|
value: [0.66666667 0.66666667 0.88888889 0.88888889 0.77777778 1.
|
|
0.625 0.75 0.5 0.875 ]
|
|
|
|
mean value: 0.7638888888888888
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.98701299 0.98701299 0.98701299 0.98701299 0.98701299
|
|
0.93589744 0.98717949 0.8974359 0.98717949]
|
|
|
|
mean value: 0.9742757242757243
|
|
|
|
key: test_fscore
|
|
value: [0.66666667 0.72727273 0.88888889 0.90909091 0.8 1.
|
|
0.66666667 0.75 0.6 0.85714286]
|
|
|
|
mean value: 0.7865728715728716
|
|
|
|
key: train_fscore
|
|
value: [1. 0.98701299 0.98701299 0.98666667 0.98666667 0.98666667
|
|
0.93506494 0.98701299 0.89473684 0.98701299]
|
|
|
|
mean value: 0.9737853725222145
|
|
|
|
key: test_precision
|
|
value: [0.6 0.57142857 0.8 0.83333333 0.8 1.
|
|
0.6 0.75 0.5 1. ]
|
|
|
|
mean value: 0.7454761904761905
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1.
|
|
0.94736842 1. 0.91891892 1. ]
|
|
|
|
mean value: 0.986628733997155
|
|
|
|
key: test_recall
|
|
value: [0.75 1. 1. 1. 0.8 1. 0.75 0.75 0.75 0.75]
|
|
|
|
mean value: 0.855
|
|
|
|
key: train_recall
|
|
value: [1. 0.97435897 0.97435897 0.97368421 0.97368421 0.97368421
|
|
0.92307692 0.97435897 0.87179487 0.97435897]
|
|
|
|
mean value: 0.9613360323886639
|
|
|
|
key: test_roc_auc
|
|
value: [0.675 0.7 0.9 0.875 0.775 1. 0.625 0.75 0.5 0.875]
|
|
|
|
mean value: 0.7675
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.98717949 0.98717949 0.98684211 0.98684211 0.98684211
|
|
0.93589744 0.98717949 0.8974359 0.98717949]
|
|
|
|
mean value: 0.9742577597840756
|
|
|
|
key: test_jcc
|
|
value: [0.5 0.57142857 0.8 0.83333333 0.66666667 1.
|
|
0.5 0.6 0.42857143 0.75 ]
|
|
|
|
mean value: 0.665
|
|
|
|
key: train_jcc
|
|
value: [1. 0.97435897 0.97435897 0.97368421 0.97368421 0.97368421
|
|
0.87804878 0.97435897 0.80952381 0.97435897]
|
|
|
|
mean value: 0.9506061119026459
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Logistic Regression
|
|
Model func: LogisticRegression(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegression(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.02788711 0.02603817 0.02707958 0.02680755 0.02836752 0.02531695
|
|
0.0276475 0.02511096 0.02504849 0.02622795]
|
|
|
|
mean value: 0.02655317783355713
|
|
|
|
key: score_time
|
|
value: [0.01307678 0.01143813 0.01144481 0.0114646 0.01140261 0.01142645
|
|
0.01154256 0.01154447 0.01145744 0.01143503]
|
|
|
|
mean value: 0.011623287200927734
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.5 0.73214286 0.76376262 0.76376262 0.6000992
|
|
0.66143783 0.75592895 0.33928571 0.73214286]
|
|
|
|
mean value: 0.6478503420505668
|
|
|
|
key: train_mcc
|
|
value: [0.92737353 0.94117647 0.89869927 0.90025835 0.92791659 0.92791659
|
|
0.88466669 0.89863497 0.89863497 0.91392776]
|
|
|
|
mean value: 0.9119205184892752
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.86666667 0.86666667 0.86666667 0.8
|
|
0.8 0.86666667 0.66666667 0.86666667]
|
|
|
|
mean value: 0.81625
|
|
|
|
key: train_accuracy
|
|
value: [0.96323529 0.97058824 0.94890511 0.94890511 0.96350365 0.96350365
|
|
0.94160584 0.94890511 0.94890511 0.95620438]
|
|
|
|
mean value: 0.9554261485616145
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.75 0.85714286 0.875 0.875 0.76923077
|
|
0.76923077 0.88888889 0.66666667 0.875 ]
|
|
|
|
mean value: 0.8126159951159951
|
|
|
|
key: train_fscore
|
|
value: [0.96240602 0.97058824 0.94814815 0.94736842 0.96296296 0.96296296
|
|
0.93939394 0.94736842 0.94736842 0.95454545]
|
|
|
|
mean value: 0.9543112981503075
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.75 0.85714286 0.77777778 0.77777778 0.83333333
|
|
1. 0.8 0.71428571 0.875 ]
|
|
|
|
mean value: 0.8242460317460317
|
|
|
|
key: train_precision
|
|
value: [0.98461538 0.97058824 0.96969697 0.984375 0.98484848 0.98484848
|
|
0.96875 0.96923077 0.96923077 0.984375 ]
|
|
|
|
mean value: 0.977055909776498
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 1. 1. 0.71428571
|
|
0.625 1. 0.625 0.875 ]
|
|
|
|
mean value: 0.8196428571428571
|
|
|
|
key: train_recall
|
|
value: [0.94117647 0.97058824 0.92753623 0.91304348 0.94202899 0.94202899
|
|
0.91176471 0.92647059 0.92647059 0.92647059]
|
|
|
|
mean value: 0.9327578857630009
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.86607143 0.875 0.875 0.79464286
|
|
0.8125 0.85714286 0.66964286 0.86607143]
|
|
|
|
mean value: 0.8178571428571428
|
|
|
|
key: train_roc_auc
|
|
value: [0.96323529 0.97058824 0.94906223 0.9491688 0.96366155 0.96366155
|
|
0.9413896 0.94874254 0.94874254 0.95598892]
|
|
|
|
mean value: 0.9554241261722081
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.6 0.75 0.77777778 0.77777778 0.625
|
|
0.625 0.8 0.5 0.77777778]
|
|
|
|
mean value: 0.69
|
|
|
|
key: train_jcc
|
|
value: [0.92753623 0.94285714 0.90140845 0.9 0.92857143 0.92857143
|
|
0.88571429 0.9 0.9 0.91304348]
|
|
|
|
mean value: 0.9127702446563438
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.66
|
|
|
|
Model_name: Logistic RegressionCV
|
|
Model func: LogisticRegressionCV(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:444: ConvergenceWarning: lbfgs failed to converge (status=1):
|
|
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
|
|
|
|
Increase the number of iterations (max_iter) or scale the data as shown in:
|
|
https://scikit-learn.org/stable/modules/preprocessing.html
|
|
Please also refer to the documentation for alternative solver options:
|
|
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
|
|
n_iter_i = _check_optimize_result(
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LogisticRegressionCV(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.72274685 0.82610178 0.69772458 0.78831792 0.76682043 0.69155788
|
|
0.73042655 0.76333189 0.84150457 0.74985647]
|
|
|
|
mean value: 0.7578388929367066
|
|
|
|
key: score_time
|
|
value: [0.0148797 0.01483393 0.0148766 0.01480103 0.01492453 0.01183009
|
|
0.02841091 0.01590896 0.01522398 0.01588035]
|
|
|
|
mean value: 0.016157007217407225
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.75 0.73214286 0.875 0.66143783 0.73214286
|
|
0.87287156 1. 0.49099025 0.87287156]
|
|
|
|
mean value: 0.7617397705319495
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.98550418 1. 1.
|
|
1. 0.98550725 1. 1. ]
|
|
|
|
mean value: 0.9971011425665239
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.875 0.86666667 0.93333333 0.8 0.86666667
|
|
0.93333333 1. 0.73333333 0.93333333]
|
|
|
|
mean value: 0.8754166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.99270073 1. 1.
|
|
1. 0.99270073 1. 1. ]
|
|
|
|
mean value: 0.9985401459854014
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.875 0.85714286 0.93333333 0.82352941 0.85714286
|
|
0.94117647 1. 0.71428571 0.94117647]
|
|
|
|
mean value: 0.8766316526610645
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.99280576 1. 1.
|
|
1. 0.99270073 1. 1. ]
|
|
|
|
mean value: 0.998550648532269
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.875 0.85714286 0.875 0.7 0.85714286
|
|
0.88888889 1. 0.83333333 0.88888889]
|
|
|
|
mean value: 0.8553174603174603
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 0.98571429 1. 1.
|
|
1. 0.98550725 1. 1. ]
|
|
|
|
mean value: 0.9971221532091097
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 0.85714286 1. 1. 0.85714286
|
|
1. 1. 0.625 1. ]
|
|
|
|
mean value: 0.9089285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.875 0.86607143 0.9375 0.8125 0.86607143
|
|
0.92857143 1. 0.74107143 0.92857143]
|
|
|
|
mean value: 0.8767857142857143
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.99264706 1. 1.
|
|
1. 0.99275362 1. 1. ]
|
|
|
|
mean value: 0.9985400682011936
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.77777778 0.75 0.875 0.7 0.75
|
|
0.88888889 1. 0.55555556 0.88888889]
|
|
|
|
mean value: 0.7886111111111112
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.98571429 1. 1.
|
|
1. 0.98550725 1. 1. ]
|
|
|
|
mean value: 0.9971221532091097
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gaussian NB
|
|
Model func: GaussianNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianNB())])
|
|
|
|
key: fit_time
|
|
value: [0.01252723 0.01011229 0.00880003 0.00886822 0.00845242 0.00850248
|
|
0.00847101 0.00859404 0.00865054 0.00848961]
|
|
|
|
mean value: 0.009146785736083985
|
|
|
|
key: score_time
|
|
value: [0.0118022 0.00891519 0.00875592 0.00878024 0.00850177 0.00916266
|
|
0.0085156 0.00849533 0.0085237 0.00861311]
|
|
|
|
mean value: 0.009006571769714356
|
|
|
|
key: test_mcc
|
|
value: [ 0.48038446 -0.25819889 -0.20044593 0.07142857 0.66143783 0.26189246
|
|
0.20044593 0.04029115 -0.07142857 0.47245559]
|
|
|
|
mean value: 0.16582626022052996
|
|
|
|
key: train_mcc
|
|
value: [0.4031367 0.44611344 0.40900022 0.49281415 0.38922674 0.4221034
|
|
0.39979063 0.36184606 0.44729758 0.41272853]
|
|
|
|
mean value: 0.4184057451895615
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.4375 0.4 0.53333333 0.8 0.6
|
|
0.6 0.53333333 0.46666667 0.73333333]
|
|
|
|
mean value: 0.5791666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.66911765 0.69852941 0.68613139 0.74452555 0.67153285 0.69343066
|
|
0.67883212 0.66423358 0.72262774 0.68613139]
|
|
|
|
mean value: 0.6915092314297981
|
|
|
|
key: test_fscore
|
|
value: [0.76190476 0.60869565 0.52631579 0.53333333 0.82352941 0.66666667
|
|
0.7 0.63157895 0.5 0.77777778]
|
|
|
|
mean value: 0.6529802340463263
|
|
|
|
key: train_fscore
|
|
value: [0.73988439 0.75449102 0.74251497 0.76190476 0.73684211 0.74698795
|
|
0.73493976 0.7195122 0.73239437 0.73939394]
|
|
|
|
mean value: 0.7408865459811903
|
|
|
|
key: test_precision
|
|
value: [0.61538462 0.46666667 0.41666667 0.5 0.7 0.54545455
|
|
0.58333333 0.54545455 0.5 0.7 ]
|
|
|
|
mean value: 0.5572960372960373
|
|
|
|
key: train_precision
|
|
value: [0.60952381 0.63636364 0.63265306 0.71794872 0.61764706 0.63917526
|
|
0.62244898 0.61458333 0.7027027 0.62886598]
|
|
|
|
mean value: 0.6421912536625458
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 0.71428571 0.57142857 1. 0.85714286
|
|
0.875 0.75 0.5 0.875 ]
|
|
|
|
mean value: 0.8017857142857143
|
|
|
|
key: train_recall
|
|
value: [0.94117647 0.92647059 0.89855072 0.8115942 0.91304348 0.89855072
|
|
0.89705882 0.86764706 0.76470588 0.89705882]
|
|
|
|
mean value: 0.8815856777493606
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.4375 0.41964286 0.53571429 0.8125 0.61607143
|
|
0.58035714 0.51785714 0.46428571 0.72321429]
|
|
|
|
mean value: 0.5794642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.66911765 0.69852941 0.68456948 0.7440324 0.66975703 0.69192242
|
|
0.68041347 0.66570759 0.72293265 0.68765985]
|
|
|
|
mean value: 0.6914641943734016
|
|
|
|
key: test_jcc
|
|
value: [0.61538462 0.4375 0.35714286 0.36363636 0.7 0.5
|
|
0.53846154 0.46153846 0.33333333 0.63636364]
|
|
|
|
mean value: 0.4943360805860806
|
|
|
|
key: train_jcc
|
|
value: [0.58715596 0.60576923 0.59047619 0.61538462 0.58333333 0.59615385
|
|
0.58095238 0.56190476 0.57777778 0.58653846]
|
|
|
|
mean value: 0.588544656159335
|
|
|
|
MCC on Blind test: -0.0
|
|
|
|
Accuracy on Blind test: 0.32
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.00886059 0.00875497 0.0086782 0.00869155 0.00862312 0.00867844
|
|
0.00865197 0.00868607 0.00863123 0.00867724]
|
|
|
|
mean value: 0.008693337440490723
|
|
|
|
key: score_time
|
|
value: [0.00846362 0.00860286 0.00848532 0.00850081 0.0084486 0.00854588
|
|
0.00849843 0.00848842 0.00855756 0.00858855]
|
|
|
|
mean value: 0.008518004417419433
|
|
|
|
key: test_mcc
|
|
value: [ 0.13483997 0.25 0.33928571 0.37796447 0.33928571 0.6000992
|
|
-0.19642857 0.21821789 0.21821789 -0.07142857]
|
|
|
|
mean value: 0.22100537098371253
|
|
|
|
key: train_mcc
|
|
value: [0.60352881 0.61791438 0.48913043 0.46134236 0.54743126 0.53185944
|
|
0.54864511 0.62041773 0.56235346 0.50525024]
|
|
|
|
mean value: 0.5487873223363263
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.625 0.66666667 0.66666667 0.66666667 0.8
|
|
0.4 0.6 0.6 0.46666667]
|
|
|
|
mean value: 0.6054166666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.80147059 0.80882353 0.74452555 0.72992701 0.77372263 0.75912409
|
|
0.77372263 0.81021898 0.7810219 0.75182482]
|
|
|
|
mean value: 0.7734381708887935
|
|
|
|
key: test_fscore
|
|
value: [0.63157895 0.625 0.66666667 0.70588235 0.66666667 0.76923077
|
|
0.4 0.57142857 0.57142857 0.5 ]
|
|
|
|
mean value: 0.6107882545730843
|
|
|
|
key: train_fscore
|
|
value: [0.79699248 0.80597015 0.74452555 0.72180451 0.77697842 0.73170732
|
|
0.76335878 0.80882353 0.7826087 0.73846154]
|
|
|
|
mean value: 0.7671230965670979
|
|
|
|
key: test_precision
|
|
value: [0.54545455 0.625 0.625 0.6 0.625 0.83333333
|
|
0.42857143 0.66666667 0.66666667 0.5 ]
|
|
|
|
mean value: 0.611569264069264
|
|
|
|
key: train_precision
|
|
value: [0.81538462 0.81818182 0.75 0.75 0.77142857 0.83333333
|
|
0.79365079 0.80882353 0.77142857 0.77419355]
|
|
|
|
mean value: 0.7886424781206565
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.71428571 0.85714286 0.71428571 0.71428571
|
|
0.375 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_recall
|
|
value: [0.77941176 0.79411765 0.73913043 0.69565217 0.7826087 0.65217391
|
|
0.73529412 0.80882353 0.79411765 0.70588235]
|
|
|
|
mean value: 0.7487212276214834
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.625 0.66964286 0.67857143 0.66964286 0.79464286
|
|
0.40178571 0.60714286 0.60714286 0.46428571]
|
|
|
|
mean value: 0.6080357142857142
|
|
|
|
key: train_roc_auc
|
|
value: [0.80147059 0.80882353 0.74456522 0.73017903 0.77365729 0.75991049
|
|
0.77344416 0.81020887 0.78111679 0.7514919 ]
|
|
|
|
mean value: 0.7734867860187553
|
|
|
|
key: test_jcc
|
|
value: [0.46153846 0.45454545 0.5 0.54545455 0.5 0.625
|
|
0.25 0.4 0.4 0.33333333]
|
|
|
|
mean value: 0.4469871794871795
|
|
|
|
key: train_jcc
|
|
value: [0.6625 0.675 0.59302326 0.56470588 0.63529412 0.57692308
|
|
0.61728395 0.67901235 0.64285714 0.58536585]
|
|
|
|
mean value: 0.6231965625549006
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: K-Nearest Neighbors
|
|
Model func: KNeighborsClassifier()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', KNeighborsClassifier())])
|
|
|
|
key: fit_time
|
|
value: [0.00869894 0.00854826 0.00917315 0.00936198 0.00942206 0.00934601
|
|
0.0093236 0.00952411 0.00928068 0.00939059]
|
|
|
|
mean value: 0.009206938743591308
|
|
|
|
key: score_time
|
|
value: [0.00957131 0.01183295 0.01011848 0.01026583 0.01015377 0.01021051
|
|
0.01025248 0.0101738 0.01047158 0.0102458 ]
|
|
|
|
mean value: 0.010329651832580566
|
|
|
|
key: test_mcc
|
|
value: [ 0.37796447 0.13483997 0.6000992 0.6000992 0.60714286 0.33928571
|
|
-0.21821789 0.87287156 -0.05455447 0.19642857]
|
|
|
|
mean value: 0.34559591828059555
|
|
|
|
key: train_mcc
|
|
value: [0.57359144 0.64705882 0.51856637 0.53294957 0.50427283 0.5182264
|
|
0.678815 0.57663612 0.62163943 0.53314859]
|
|
|
|
mean value: 0.5704904582687371
|
|
|
|
key: test_accuracy
|
|
value: [0.6875 0.5625 0.8 0.8 0.8 0.66666667
|
|
0.4 0.93333333 0.46666667 0.6 ]
|
|
|
|
mean value: 0.6716666666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.78676471 0.82352941 0.75912409 0.76642336 0.75182482 0.75912409
|
|
0.83941606 0.78832117 0.81021898 0.76642336]
|
|
|
|
mean value: 0.7851170030055817
|
|
|
|
key: test_fscore
|
|
value: [0.70588235 0.63157895 0.76923077 0.76923077 0.8 0.66666667
|
|
0.47058824 0.94117647 0.42857143 0.625 ]
|
|
|
|
mean value: 0.6807925639891584
|
|
|
|
key: train_fscore
|
|
value: [0.78518519 0.82352941 0.76595745 0.77142857 0.76056338 0.76258993
|
|
0.83823529 0.78518519 0.81428571 0.76811594]
|
|
|
|
mean value: 0.787507605914375
|
|
|
|
key: test_precision
|
|
value: [0.66666667 0.54545455 0.83333333 0.83333333 0.75 0.625
|
|
0.44444444 0.88888889 0.5 0.625 ]
|
|
|
|
mean value: 0.6712121212121211
|
|
|
|
key: train_precision
|
|
value: [0.79104478 0.82352941 0.75 0.76056338 0.73972603 0.75714286
|
|
0.83823529 0.79104478 0.79166667 0.75714286]
|
|
|
|
mean value: 0.780009604675249
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.71428571 0.71428571 0.85714286 0.71428571
|
|
0.5 1. 0.375 0.625 ]
|
|
|
|
mean value: 0.7
|
|
|
|
key: train_recall
|
|
value: [0.77941176 0.82352941 0.7826087 0.7826087 0.7826087 0.76811594
|
|
0.83823529 0.77941176 0.83823529 0.77941176]
|
|
|
|
mean value: 0.7954177323103154
|
|
|
|
key: test_roc_auc
|
|
value: [0.6875 0.5625 0.79464286 0.79464286 0.80357143 0.66964286
|
|
0.39285714 0.92857143 0.47321429 0.59821429]
|
|
|
|
mean value: 0.6705357142857142
|
|
|
|
key: train_roc_auc
|
|
value: [0.78676471 0.82352941 0.75895141 0.76630435 0.75159847 0.75905797
|
|
0.8394075 0.78825661 0.81042199 0.76651748]
|
|
|
|
mean value: 0.785080988917306
|
|
|
|
key: test_jcc
|
|
value: [0.54545455 0.46153846 0.625 0.625 0.66666667 0.5
|
|
0.30769231 0.88888889 0.27272727 0.45454545]
|
|
|
|
mean value: 0.5347513597513598
|
|
|
|
key: train_jcc
|
|
value: [0.64634146 0.7 0.62068966 0.62790698 0.61363636 0.61627907
|
|
0.72151899 0.64634146 0.68674699 0.62352941]
|
|
|
|
mean value: 0.6502990379207959
|
|
|
|
MCC on Blind test: -0.02
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: SVM
|
|
Model func: SVC(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SVC(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01127934 0.01113343 0.01129603 0.01077962 0.01127839 0.01124501
|
|
0.01120043 0.01065063 0.01107168 0.01019716]
|
|
|
|
mean value: 0.011013174057006836
|
|
|
|
key: score_time
|
|
value: [0.00994158 0.00969481 0.00966859 0.00969338 0.0096314 0.01019287
|
|
0.00968385 0.00993729 0.0096488 0.00895643]
|
|
|
|
mean value: 0.009704899787902833
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.25819889 0.73214286 0.60714286 0.60714286 0.6000992
|
|
0.56407607 0.60714286 0.21821789 0.46428571]
|
|
|
|
mean value: 0.5288389984155754
|
|
|
|
key: train_mcc
|
|
value: [0.86774089 0.85294118 0.83951407 0.81247516 0.84173622 0.88320546
|
|
0.78496269 0.79855228 0.8251228 0.78764184]
|
|
|
|
mean value: 0.8293892570231083
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.625 0.86666667 0.8 0.8 0.8
|
|
0.73333333 0.8 0.6 0.73333333]
|
|
|
|
mean value: 0.7570833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.93382353 0.92647059 0.91970803 0.90510949 0.91970803 0.94160584
|
|
0.89051095 0.89781022 0.91240876 0.89051095]
|
|
|
|
mean value: 0.9137666380420781
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.66666667 0.85714286 0.8 0.8 0.76923077
|
|
0.66666667 0.8 0.57142857 0.75 ]
|
|
|
|
mean value: 0.7481135531135531
|
|
|
|
key: train_fscore
|
|
value: [0.93430657 0.92647059 0.91970803 0.90225564 0.91729323 0.94202899
|
|
0.88372093 0.89230769 0.91044776 0.88188976]
|
|
|
|
mean value: 0.9110429191976945
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.6 0.85714286 0.75 0.75 0.83333333
|
|
1. 0.85714286 0.66666667 0.75 ]
|
|
|
|
mean value: 0.7921428571428571
|
|
|
|
key: train_precision
|
|
value: [0.92753623 0.92647059 0.92647059 0.9375 0.953125 0.94202899
|
|
0.93442623 0.93548387 0.92424242 0.94915254]
|
|
|
|
mean value: 0.9356436460953137
|
|
|
|
key: test_recall
|
|
value: [0.75 0.75 0.85714286 0.85714286 0.85714286 0.71428571
|
|
0.5 0.75 0.5 0.75 ]
|
|
|
|
mean value: 0.7285714285714285
|
|
|
|
key: train_recall
|
|
value: [0.94117647 0.92647059 0.91304348 0.86956522 0.88405797 0.94202899
|
|
0.83823529 0.85294118 0.89705882 0.82352941]
|
|
|
|
mean value: 0.8888107416879796
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.625 0.86607143 0.80357143 0.80357143 0.79464286
|
|
0.75 0.80357143 0.60714286 0.73214286]
|
|
|
|
mean value: 0.7598214285714285
|
|
|
|
key: train_roc_auc
|
|
value: [0.93382353 0.92647059 0.91975703 0.90537084 0.91997016 0.94160273
|
|
0.89013214 0.89748508 0.91229753 0.89002558]
|
|
|
|
mean value: 0.9136935208866155
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.5 0.75 0.66666667 0.66666667 0.625
|
|
0.5 0.66666667 0.4 0.6 ]
|
|
|
|
mean value: 0.6041666666666666
|
|
|
|
key: train_jcc
|
|
value: [0.87671233 0.8630137 0.85135135 0.82191781 0.84722222 0.89041096
|
|
0.79166667 0.80555556 0.83561644 0.78873239]
|
|
|
|
mean value: 0.8372199423038705
|
|
|
|
MCC on Blind test: 0.06
|
|
|
|
Accuracy on Blind test: 0.69
|
|
|
|
Model_name: MLP
|
|
Model func: MLPClassifier(max_iter=500, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MLPClassifier(max_iter=500, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.52222729 0.468153 0.55469823 0.67369676 0.54588056 0.5346756
|
|
0.54372573 0.65563345 0.51112771 0.51687884]
|
|
|
|
mean value: 0.5526697158813476
|
|
|
|
key: score_time
|
|
value: [0.01204991 0.01208353 0.01204729 0.01215768 0.01216936 0.01213455
|
|
0.01216435 0.01220655 0.01210856 0.01213312]
|
|
|
|
mean value: 0.012125492095947266
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.62994079 0.875 0.66143783 0.60714286 0.73214286
|
|
0.66143783 0.87287156 0.33928571 0.73214286]
|
|
|
|
mean value: 0.6741343078887975
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.8125 0.93333333 0.8 0.8 0.86666667
|
|
0.8 0.93333333 0.66666667 0.86666667]
|
|
|
|
mean value: 0.8291666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.82352941 0.93333333 0.82352941 0.8 0.85714286
|
|
0.76923077 0.94117647 0.66666667 0.875 ]
|
|
|
|
mean value: 0.8313138332255979
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.77777778 0.875 0.7 0.75 0.85714286
|
|
1. 0.88888889 0.71428571 0.875 ]
|
|
|
|
mean value: 0.8215873015873015
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 0.85714286 0.85714286
|
|
0.625 1. 0.625 0.875 ]
|
|
|
|
mean value: 0.8589285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.8125 0.9375 0.8125 0.80357143 0.86607143
|
|
0.8125 0.92857143 0.66964286 0.86607143]
|
|
|
|
mean value: 0.8321428571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.7 0.875 0.7 0.66666667 0.75
|
|
0.625 0.88888889 0.5 0.77777778]
|
|
|
|
mean value: 0.7183333333333333
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.07
|
|
|
|
Accuracy on Blind test: 0.7
|
|
|
|
Model_name: Decision Tree
|
|
Model func: DecisionTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', DecisionTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01453376 0.01425242 0.01198053 0.01168633 0.01525187 0.01180673
|
|
0.01120734 0.0110929 0.01055717 0.01045299]
|
|
|
|
mean value: 0.012282204627990723
|
|
|
|
key: score_time
|
|
value: [0.01155376 0.00964212 0.00991273 0.00939679 0.01865244 0.00950599
|
|
0.00897455 0.00899673 0.00857711 0.00861549]
|
|
|
|
mean value: 0.010382771492004395
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.75 0.875 1. 0.76376262 0.73214286
|
|
0.87287156 1. 0.60714286 1. ]
|
|
|
|
mean value: 0.8482836994743854
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 0.93333333 1. 0.86666667 0.86666667
|
|
0.93333333 1. 0.8 1. ]
|
|
|
|
mean value: 0.92125
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.875 0.93333333 1. 0.875 0.85714286
|
|
0.94117647 1. 0.8 1. ]
|
|
|
|
mean value: 0.9222829131652661
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.875 0.875 1. 0.77777778 0.85714286
|
|
0.88888889 1. 0.85714286 1. ]
|
|
|
|
mean value: 0.901984126984127
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9482142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.875 0.9375 1. 0.875 0.86607143
|
|
0.92857143 1. 0.80357143 1. ]
|
|
|
|
mean value: 0.9223214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.77777778 0.875 1. 0.77777778 0.75
|
|
0.88888889 1. 0.66666667 1. ]
|
|
|
|
mean value: 0.8625
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: Extra Trees
|
|
Model func: ExtraTreesClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreesClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.08960724 0.08749914 0.08709145 0.0881474 0.08605218 0.08645868
|
|
0.09108973 0.08840013 0.08683872 0.0871675 ]
|
|
|
|
mean value: 0.08783521652221679
|
|
|
|
key: score_time
|
|
value: [0.01729894 0.01750088 0.01744103 0.01703835 0.01712751 0.01697516
|
|
0.01730871 0.01716542 0.01720405 0.01721263]
|
|
|
|
mean value: 0.01722726821899414
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.75 1. 0.73214286 0.875 0.60714286
|
|
0.66143783 1. 0.49099025 0.87287156]
|
|
|
|
mean value: 0.7619526144375526
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.875 1. 0.86666667 0.93333333 0.8
|
|
0.8 1. 0.73333333 0.93333333]
|
|
|
|
mean value: 0.8754166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.875 1. 0.85714286 0.93333333 0.8
|
|
0.76923077 1. 0.71428571 0.94117647]
|
|
|
|
mean value: 0.8713698556345615
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.875 1. 0.85714286 0.875 0.75
|
|
1. 1. 0.83333333 0.88888889]
|
|
|
|
mean value: 0.8857142857142857
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.85714286 1. 0.85714286
|
|
0.625 1. 0.625 1. ]
|
|
|
|
mean value: 0.8714285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.875 1. 0.86607143 0.9375 0.80357143
|
|
0.8125 1. 0.74107143 0.92857143]
|
|
|
|
mean value: 0.8776785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.77777778 1. 0.75 0.875 0.66666667
|
|
0.625 1. 0.55555556 0.88888889]
|
|
|
|
mean value: 0.7838888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: -0.04
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Extra Tree
|
|
Model func: ExtraTreeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', ExtraTreeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.00924444 0.00962591 0.01027131 0.00915217 0.00907969 0.0097084
|
|
0.00920677 0.00894308 0.00980639 0.0091753 ]
|
|
|
|
mean value: 0.009421348571777344
|
|
|
|
key: score_time
|
|
value: [0.00945544 0.010427 0.00939941 0.00925112 0.00885606 0.00896883
|
|
0.00881791 0.00923467 0.00883985 0.00884008]
|
|
|
|
mean value: 0.009209036827087402
|
|
|
|
key: test_mcc
|
|
value: [0.51639778 0.51639778 0.76376262 0.60714286 0.56407607 0.73214286
|
|
0.60714286 0.75592895 0.33928571 0.87287156]
|
|
|
|
mean value: 0.6275149042309094
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.75 0.75 0.86666667 0.8 0.73333333 0.86666667
|
|
0.8 0.86666667 0.66666667 0.93333333]
|
|
|
|
mean value: 0.8033333333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.77777778 0.77777778 0.875 0.8 0.77777778 0.85714286
|
|
0.8 0.88888889 0.66666667 0.94117647]
|
|
|
|
mean value: 0.8162208216619982
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.7 0.7 0.77777778 0.75 0.63636364 0.85714286
|
|
0.85714286 0.8 0.71428571 0.88888889]
|
|
|
|
mean value: 0.7681601731601732
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.85714286 1. 0.85714286
|
|
0.75 1. 0.625 1. ]
|
|
|
|
mean value: 0.8839285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.75 0.75 0.875 0.80357143 0.75 0.86607143
|
|
0.80357143 0.85714286 0.66964286 0.92857143]
|
|
|
|
mean value: 0.8053571428571429
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.63636364 0.63636364 0.77777778 0.66666667 0.63636364 0.75
|
|
0.66666667 0.8 0.5 0.88888889]
|
|
|
|
mean value: 0.6959090909090909
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: Random Forest
|
|
Model func: RandomForestClassifier(n_estimators=1000, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(n_estimators=1000, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [1.12276006 1.12847686 1.12388277 1.12958765 1.19054484 1.1342032
|
|
1.2050643 1.20557404 1.12965131 1.12810969]
|
|
|
|
mean value: 1.1497854709625244
|
|
|
|
key: score_time
|
|
value: [0.08781695 0.08817816 0.09504771 0.09640932 0.08869791 0.09375215
|
|
0.09752321 0.0974381 0.09431314 0.08888292]
|
|
|
|
mean value: 0.09280595779418946
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.8819171 1. 0.60714286 0.875 0.73214286
|
|
0.60714286 1. 0.49099025 0.75592895]
|
|
|
|
mean value: 0.7832181977854402
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.9375 1. 0.8 0.93333333 0.86666667
|
|
0.8 1. 0.73333333 0.86666667]
|
|
|
|
mean value: 0.8875
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.93333333 1. 0.8 0.93333333 0.85714286
|
|
0.8 1. 0.71428571 0.88888889]
|
|
|
|
mean value: 0.8860317460317461
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 0.75 0.875 0.85714286
|
|
0.85714286 1. 0.83333333 0.8 ]
|
|
|
|
mean value: 0.8972619047619048
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.85714286 1. 0.85714286
|
|
0.75 1. 0.625 1. ]
|
|
|
|
mean value: 0.8839285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.9375 1. 0.80357143 0.9375 0.86607143
|
|
0.80357143 1. 0.74107143 0.85714286]
|
|
|
|
mean value: 0.8883928571428572
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/ensemble/_forest.py:427: FutureWarning: `max_features='auto'` has been deprecated in 1.1 and will be removed in 1.3. To keep the past behaviour, explicitly set `max_features='sqrt'` or remove this parameter as it is also the default value for RandomForestClassifiers and ExtraTreesClassifiers.
|
|
warn(
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.875 1. 0.66666667 0.875 0.75
|
|
0.66666667 1. 0.55555556 0.8 ]
|
|
|
|
mean value: 0.8063888888888889
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.8
|
|
|
|
Model_name: Random Forest2
|
|
Model func: RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...05', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10,
|
|
oob_score=True, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.89038706 0.92174602 0.82130027 0.89599752 0.8950417 0.82764983
|
|
0.90394044 0.864923 0.87637615 0.87166667]
|
|
|
|
mean value: 0.8769028663635254
|
|
|
|
key: score_time
|
|
value: [0.1252284 0.20888615 0.21349883 0.23671651 0.32386017 0.22654843
|
|
0.23682284 0.18615389 0.17947078 0.20715261]
|
|
|
|
mean value: 0.2144338607788086
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.8819171 0.875 0.73214286 0.875 0.6000992
|
|
0.60714286 0.76376262 0.49099025 0.6000992 ]
|
|
|
|
mean value: 0.7308071186817022
|
|
|
|
key: train_mcc
|
|
value: [0.98540068 1. 0.98550725 0.98550725 0.98550725 0.97122151
|
|
0.98550418 0.97120941 0.98550418 0.97120941]
|
|
|
|
mean value: 0.9826571107637088
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.9375 0.93333333 0.86666667 0.93333333 0.8
|
|
0.8 0.86666667 0.73333333 0.8 ]
|
|
|
|
mean value: 0.8608333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.99264706 1. 0.99270073 0.99270073 0.99270073 0.98540146
|
|
0.99270073 0.98540146 0.99270073 0.98540146]
|
|
|
|
mean value: 0.991235508802061
|
|
|
|
key: test_fscore
|
|
value: [0.93333333 0.93333333 0.93333333 0.85714286 0.93333333 0.76923077
|
|
0.8 0.85714286 0.71428571 0.82352941]
|
|
|
|
mean value: 0.8554664942900236
|
|
|
|
key: train_fscore
|
|
value: [0.99259259 1. 0.99270073 0.99270073 0.99270073 0.98529412
|
|
0.99259259 0.98507463 0.99259259 0.98507463]
|
|
|
|
mean value: 0.9911323338937202
|
|
|
|
key: test_precision
|
|
value: [1. 1. 0.875 0.85714286 0.875 0.83333333
|
|
0.85714286 1. 0.83333333 0.77777778]
|
|
|
|
mean value: 0.8908730158730158
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 0.85714286 1. 0.71428571
|
|
0.75 0.75 0.625 0.875 ]
|
|
|
|
mean value: 0.8321428571428572
|
|
|
|
key: train_recall
|
|
value: [0.98529412 1. 0.98550725 0.98550725 0.98550725 0.97101449
|
|
0.98529412 0.97058824 0.98529412 0.97058824]
|
|
|
|
mean value: 0.982459505541347
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.9375 0.9375 0.86607143 0.9375 0.79464286
|
|
0.80357143 0.875 0.74107143 0.79464286]
|
|
|
|
mean value: 0.8625
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 1. 0.99275362 0.99275362 0.99275362 0.98550725
|
|
0.99264706 0.98529412 0.99264706 0.98529412]
|
|
|
|
mean value: 0.9912297527706735
|
|
|
|
key: test_jcc
|
|
value: [0.875 0.875 0.875 0.75 0.875 0.625
|
|
0.66666667 0.75 0.55555556 0.7 ]
|
|
|
|
mean value: 0.7547222222222222
|
|
|
|
key: train_jcc
|
|
value: [0.98529412 1. 0.98550725 0.98550725 0.98550725 0.97101449
|
|
0.98529412 0.97058824 0.98529412 0.97058824]
|
|
|
|
mean value: 0.982459505541347
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.78
|
|
|
|
Model_name: Naive Bayes
|
|
Model func: BernoulliNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', BernoulliNB())])
|
|
|
|
key: fit_time
|
|
value: [0.02329063 0.00864911 0.00860119 0.00866508 0.00866747 0.00861073
|
|
0.00878024 0.0087285 0.00864959 0.00864077]
|
|
|
|
mean value: 0.010128331184387208
|
|
|
|
key: score_time
|
|
value: [0.01024389 0.00845623 0.00859714 0.0085454 0.008533 0.00846243
|
|
0.00858235 0.00851011 0.00851059 0.00857377]
|
|
|
|
mean value: 0.008701491355895995
|
|
|
|
key: test_mcc
|
|
value: [ 0.13483997 0.25 0.33928571 0.37796447 0.33928571 0.6000992
|
|
-0.19642857 0.21821789 0.21821789 -0.07142857]
|
|
|
|
mean value: 0.22100537098371253
|
|
|
|
key: train_mcc
|
|
value: [0.60352881 0.61791438 0.48913043 0.46134236 0.54743126 0.53185944
|
|
0.54864511 0.62041773 0.56235346 0.50525024]
|
|
|
|
mean value: 0.5487873223363263
|
|
|
|
key: test_accuracy
|
|
value: [0.5625 0.625 0.66666667 0.66666667 0.66666667 0.8
|
|
0.4 0.6 0.6 0.46666667]
|
|
|
|
mean value: 0.6054166666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.80147059 0.80882353 0.74452555 0.72992701 0.77372263 0.75912409
|
|
0.77372263 0.81021898 0.7810219 0.75182482]
|
|
|
|
mean value: 0.7734381708887935
|
|
|
|
key: test_fscore
|
|
value: [0.63157895 0.625 0.66666667 0.70588235 0.66666667 0.76923077
|
|
0.4 0.57142857 0.57142857 0.5 ]
|
|
|
|
mean value: 0.6107882545730843
|
|
|
|
key: train_fscore
|
|
value: [0.79699248 0.80597015 0.74452555 0.72180451 0.77697842 0.73170732
|
|
0.76335878 0.80882353 0.7826087 0.73846154]
|
|
|
|
mean value: 0.7671230965670979
|
|
|
|
key: test_precision
|
|
value: [0.54545455 0.625 0.625 0.6 0.625 0.83333333
|
|
0.42857143 0.66666667 0.66666667 0.5 ]
|
|
|
|
mean value: 0.611569264069264
|
|
|
|
key: train_precision
|
|
value: [0.81538462 0.81818182 0.75 0.75 0.77142857 0.83333333
|
|
0.79365079 0.80882353 0.77142857 0.77419355]
|
|
|
|
mean value: 0.7886424781206565
|
|
|
|
key: test_recall
|
|
value: [0.75 0.625 0.71428571 0.85714286 0.71428571 0.71428571
|
|
0.375 0.5 0.5 0.5 ]
|
|
|
|
mean value: 0.625
|
|
|
|
key: train_recall
|
|
value: [0.77941176 0.79411765 0.73913043 0.69565217 0.7826087 0.65217391
|
|
0.73529412 0.80882353 0.79411765 0.70588235]
|
|
|
|
mean value: 0.7487212276214834
|
|
|
|
key: test_roc_auc
|
|
value: [0.5625 0.625 0.66964286 0.67857143 0.66964286 0.79464286
|
|
0.40178571 0.60714286 0.60714286 0.46428571]
|
|
|
|
mean value: 0.6080357142857142
|
|
|
|
key: train_roc_auc
|
|
value: [0.80147059 0.80882353 0.74456522 0.73017903 0.77365729 0.75991049
|
|
0.77344416 0.81020887 0.78111679 0.7514919 ]
|
|
|
|
mean value: 0.7734867860187553
|
|
|
|
key: test_jcc
|
|
value: [0.46153846 0.45454545 0.5 0.54545455 0.5 0.625
|
|
0.25 0.4 0.4 0.33333333]
|
|
|
|
mean value: 0.4469871794871795
|
|
|
|
key: train_jcc
|
|
value: [0.6625 0.675 0.59302326 0.56470588 0.63529412 0.57692308
|
|
0.61728395 0.67901235 0.64285714 0.58536585]
|
|
|
|
mean value: 0.6231965625549006
|
|
|
|
MCC on Blind test: 0.05
|
|
|
|
Accuracy on Blind test: 0.61
|
|
|
|
Model_name: XGBoost
|
|
Model func: XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
|
|
colsample_bynode=None, colsample_bytree=None,
|
|
enable_categorical=False, gamma=None, gpu_id=None,
|
|
importance_type=None, interaction_constraints=None,
|
|
learning_rate=None, max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan, monotone_constraints=None,
|
|
n_estimators=100, n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None, reg_lambda=None,
|
|
scale_pos_weight=None, subsample=None, tree_method=None,
|
|
use_label_encoder=False, validate_parameters=None, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000...
|
|
interaction_constraints=None, learning_rate=None,
|
|
max_delta_step=None, max_depth=None,
|
|
min_child_weight=None, missing=nan,
|
|
monotone_constraints=None, n_estimators=100,
|
|
n_jobs=None, num_parallel_tree=None,
|
|
predictor=None, random_state=42, reg_alpha=None,
|
|
reg_lambda=None, scale_pos_weight=None,
|
|
subsample=None, tree_method=None,
|
|
use_label_encoder=False,
|
|
validate_parameters=None, verbosity=0))])
|
|
|
|
key: fit_time
|
|
value: [0.081218 0.03844333 0.06259561 0.03818011 0.03826547 0.04282951
|
|
0.04249716 0.04346681 0.0452919 0.04476476]
|
|
|
|
mean value: 0.04775526523590088
|
|
|
|
key: score_time
|
|
value: [0.01017928 0.010149 0.01186872 0.01010132 0.0101397 0.01045871
|
|
0.01019597 0.01062679 0.01143193 0.01149797]
|
|
|
|
mean value: 0.010664939880371094
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 1. 1. 1. 0.76376262 0.875
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.952067971951417
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 1. 1. 1. 0.86666667 0.93333333
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.97375
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 1. 1. 1. 0.875 0.93333333
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9749509803921569
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 0.77777778 0.875
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9541666666666666
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 1. 1. 1. 0.875 0.9375 1. 1. 1. 1. ]
|
|
|
|
mean value: 0.975
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 1. 1. 1. 0.77777778 0.875
|
|
1. 1. 1. 1. ]
|
|
|
|
mean value: 0.9541666666666666
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.84
|
|
|
|
Model_name: LDA
|
|
Model func: LinearDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', LinearDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.02603173 0.06835532 0.02540469 0.02657223 0.04874587 0.04817772
|
|
0.04732275 0.02142692 0.04089713 0.04774785]
|
|
|
|
mean value: 0.04006822109222412
|
|
|
|
key: score_time
|
|
value: [0.021065 0.01183319 0.01171088 0.02026939 0.01557493 0.02242637
|
|
0.01170111 0.01167488 0.02084661 0.02236056]
|
|
|
|
mean value: 0.016946291923522948
|
|
|
|
key: test_mcc
|
|
value: [0.67419986 0.62994079 0.49099025 0.49099025 0.66143783 0.26189246
|
|
0.18898224 0.75592895 0.19642857 0.46428571]
|
|
|
|
mean value: 0.481507691618567
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.8125 0.73333333 0.73333333 0.8 0.6
|
|
0.6 0.86666667 0.6 0.73333333]
|
|
|
|
mean value: 0.7291666666666666
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.84210526 0.82352941 0.75 0.75 0.82352941 0.66666667
|
|
0.66666667 0.88888889 0.625 0.75 ]
|
|
|
|
mean value: 0.7586386308909528
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.72727273 0.77777778 0.66666667 0.66666667 0.7 0.54545455
|
|
0.6 0.8 0.625 0.75 ]
|
|
|
|
mean value: 0.6858838383838384
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 0.85714286 0.85714286 1. 0.85714286
|
|
0.75 1. 0.625 0.75 ]
|
|
|
|
mean value: 0.8571428571428571
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.8125 0.74107143 0.74107143 0.8125 0.61607143
|
|
0.58928571 0.85714286 0.59821429 0.73214286]
|
|
|
|
mean value: 0.73125
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.72727273 0.7 0.6 0.6 0.7 0.5
|
|
0.5 0.8 0.45454545 0.6 ]
|
|
|
|
mean value: 0.6181818181818182
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.04
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Multinomial
|
|
Model func: MultinomialNB()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', MultinomialNB())])
|
|
|
|
key: fit_time
|
|
value: [0.0242095 0.00912499 0.00889945 0.0089488 0.00893569 0.00852466
|
|
0.0086081 0.00871897 0.00848079 0.00855207]
|
|
|
|
mean value: 0.010300302505493164
|
|
|
|
key: score_time
|
|
value: [0.01155972 0.00883555 0.00887346 0.0088973 0.00890279 0.00845218
|
|
0.00838852 0.00840425 0.00845933 0.0083673 ]
|
|
|
|
mean value: 0.008914041519165038
|
|
|
|
key: test_mcc
|
|
value: [ 0.62994079 0.12598816 0.46428571 0.34247476 0.19642857 0.21821789
|
|
0.49099025 0.33928571 -0.07142857 0.07142857]
|
|
|
|
mean value: 0.28076118489962165
|
|
|
|
key: train_mcc
|
|
value: [0.36800537 0.33827187 0.37278745 0.37278745 0.41939006 0.29950971
|
|
0.34357013 0.31379238 0.34429809 0.40309474]
|
|
|
|
mean value: 0.3575507254340045
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.5625 0.73333333 0.66666667 0.6 0.6
|
|
0.73333333 0.66666667 0.46666667 0.53333333]
|
|
|
|
mean value: 0.6375
|
|
|
|
key: train_accuracy
|
|
value: [0.68382353 0.66911765 0.68613139 0.68613139 0.7080292 0.64963504
|
|
0.67153285 0.65693431 0.67153285 0.70072993]
|
|
|
|
mean value: 0.6783598110777158
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.58823529 0.71428571 0.54545455 0.57142857 0.625
|
|
0.71428571 0.66666667 0.5 0.53333333]
|
|
|
|
mean value: 0.6282219251336898
|
|
|
|
key: train_fscore
|
|
value: [0.67669173 0.66666667 0.68148148 0.68148148 0.69230769 0.64705882
|
|
0.67625899 0.65185185 0.68085106 0.68217054]
|
|
|
|
mean value: 0.6736820325913095
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.55555556 0.71428571 0.75 0.57142857 0.55555556
|
|
0.83333333 0.71428571 0.5 0.57142857]
|
|
|
|
mean value: 0.6543650793650794
|
|
|
|
key: train_precision
|
|
value: [0.69230769 0.67164179 0.6969697 0.6969697 0.73770492 0.65671642
|
|
0.66197183 0.65671642 0.65753425 0.72131148]
|
|
|
|
mean value: 0.6849844184116639
|
|
|
|
key: test_recall
|
|
value: [0.875 0.625 0.71428571 0.42857143 0.57142857 0.71428571
|
|
0.625 0.625 0.5 0.5 ]
|
|
|
|
mean value: 0.6178571428571429
|
|
|
|
key: train_recall
|
|
value: [0.66176471 0.66176471 0.66666667 0.66666667 0.65217391 0.63768116
|
|
0.69117647 0.64705882 0.70588235 0.64705882]
|
|
|
|
mean value: 0.6637894288150042
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.5625 0.73214286 0.65178571 0.59821429 0.60714286
|
|
0.74107143 0.66964286 0.46428571 0.53571429]
|
|
|
|
mean value: 0.6375000000000001
|
|
|
|
key: train_roc_auc
|
|
value: [0.68382353 0.66911765 0.68627451 0.68627451 0.7084399 0.64972293
|
|
0.67167519 0.65686275 0.67178176 0.70034101]
|
|
|
|
mean value: 0.6784313725490195
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.41666667 0.55555556 0.375 0.4 0.45454545
|
|
0.55555556 0.5 0.33333333 0.36363636]
|
|
|
|
mean value: 0.4654292929292929
|
|
|
|
key: train_jcc
|
|
value: [0.51136364 0.5 0.51685393 0.51685393 0.52941176 0.47826087
|
|
0.51086957 0.48351648 0.51612903 0.51764706]
|
|
|
|
mean value: 0.5080906275618744
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.58
|
|
|
|
Model_name: Passive Aggresive
|
|
Model func: PassiveAggressiveClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
PassiveAggressiveClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01075029 0.0149982 0.0139091 0.01420832 0.01531124 0.01442099
|
|
0.01410794 0.01402354 0.0139544 0.01422453]
|
|
|
|
mean value: 0.01399085521697998
|
|
|
|
key: score_time
|
|
value: [0.00854301 0.01132798 0.01137161 0.01131582 0.01130557 0.01135063
|
|
0.01129627 0.01128507 0.01138473 0.01128197]
|
|
|
|
mean value: 0.011046266555786133
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.16012815 0.64465837 0.41931393 0.76376262 0.75592895
|
|
0.56407607 0.64465837 0.21821789 0.87287156]
|
|
|
|
mean value: 0.5673556707125331
|
|
|
|
key: train_mcc
|
|
value: [0.84942274 0.83666003 0.79573573 0.62625207 0.98550418 0.95630861
|
|
0.77016909 0.85060976 0.88920184 0.98550418]
|
|
|
|
mean value: 0.8545368225433598
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.5625 0.8 0.66666667 0.86666667 0.86666667
|
|
0.73333333 0.8 0.6 0.93333333]
|
|
|
|
mean value: 0.7641666666666667
|
|
|
|
key: train_accuracy
|
|
value: [0.91911765 0.91176471 0.89051095 0.7810219 0.99270073 0.97810219
|
|
0.87591241 0.91970803 0.94160584 0.99270073]
|
|
|
|
mean value: 0.9203145126663804
|
|
|
|
key: test_fscore
|
|
value: [0.8 0.66666667 0.72727273 0.44444444 0.875 0.83333333
|
|
0.66666667 0.84210526 0.57142857 0.94117647]
|
|
|
|
mean value: 0.736809414355854
|
|
|
|
key: train_fscore
|
|
value: [0.912 0.91891892 0.88 0.72222222 0.99280576 0.97810219
|
|
0.85950413 0.92517007 0.9375 0.99259259]
|
|
|
|
mean value: 0.9118815879169054
|
|
|
|
key: test_precision
|
|
value: [0.85714286 0.53846154 1. 1. 0.77777778 1.
|
|
1. 0.72727273 0.66666667 0.88888889]
|
|
|
|
mean value: 0.8456210456210456
|
|
|
|
key: train_precision
|
|
value: [1. 0.85 0.98214286 1. 0.98571429 0.98529412
|
|
0.98113208 0.86075949 1. 1. ]
|
|
|
|
mean value: 0.9645042829646786
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.57142857 0.28571429 1. 0.71428571
|
|
0.5 1. 0.5 1. ]
|
|
|
|
mean value: 0.7196428571428571
|
|
|
|
key: train_recall
|
|
value: [0.83823529 1. 0.79710145 0.56521739 1. 0.97101449
|
|
0.76470588 1. 0.88235294 0.98529412]
|
|
|
|
mean value: 0.8803921568627451
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.5625 0.78571429 0.64285714 0.875 0.85714286
|
|
0.75 0.78571429 0.60714286 0.92857143]
|
|
|
|
mean value: 0.7607142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.91911765 0.91176471 0.89119778 0.7826087 0.99264706 0.97815431
|
|
0.87510656 0.92028986 0.94117647 0.99264706]
|
|
|
|
mean value: 0.9204710144927536
|
|
|
|
key: test_jcc
|
|
value: [0.66666667 0.5 0.57142857 0.28571429 0.77777778 0.71428571
|
|
0.5 0.72727273 0.4 0.88888889]
|
|
|
|
mean value: 0.6032034632034632
|
|
|
|
key: train_jcc
|
|
value: [0.83823529 0.85 0.78571429 0.56521739 0.98571429 0.95714286
|
|
0.75362319 0.86075949 0.88235294 0.98529412]
|
|
|
|
mean value: 0.8464053854893636
|
|
|
|
MCC on Blind test: 0.1
|
|
|
|
Accuracy on Blind test: 0.56
|
|
|
|
Model_name: Stochastic GDescent
|
|
Model func: SGDClassifier(n_jobs=10, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', SGDClassifier(n_jobs=10, random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.01437926 0.01255703 0.01282096 0.01237679 0.01261806 0.01318884
|
|
0.01328611 0.01358056 0.01302099 0.01280618]
|
|
|
|
mean value: 0.013063478469848632
|
|
|
|
key: score_time
|
|
value: [0.01148844 0.01132059 0.01194882 0.0113225 0.01146626 0.01135039
|
|
0.01135278 0.01143408 0.0113225 0.01133156]
|
|
|
|
mean value: 0.011433792114257813
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.51639778 0.66143783 0.46428571 0.60714286 0.87287156
|
|
0.875 0.53452248 0.37796447 0.37796447]
|
|
|
|
mean value: 0.5917527957825026
|
|
|
|
key: train_mcc
|
|
value: [1. 0.79405762 0.9001543 0.82106272 0.82629176 0.92709446
|
|
0.97122151 0.60385237 0.87609014 0.66978117]
|
|
|
|
mean value: 0.8389606031963133
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.75 0.8 0.73333333 0.8 0.93333333
|
|
0.93333333 0.73333333 0.66666667 0.66666667]
|
|
|
|
mean value: 0.7829166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 0.88970588 0.94890511 0.90510949 0.91240876 0.96350365
|
|
0.98540146 0.76642336 0.93430657 0.81021898]
|
|
|
|
mean value: 0.9115983254615715
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.71428571 0.82352941 0.71428571 0.8 0.92307692
|
|
0.93333333 0.8 0.61538462 0.61538462]
|
|
|
|
mean value: 0.7762809739280327
|
|
|
|
key: train_fscore
|
|
value: [1. 0.87804878 0.95104895 0.8976378 0.91044776 0.96402878
|
|
0.98550725 0.80952381 0.92913386 0.76363636]
|
|
|
|
mean value: 0.9089013342789495
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.83333333 0.7 0.71428571 0.75 1.
|
|
1. 0.66666667 0.8 0.8 ]
|
|
|
|
mean value: 0.8042063492063493
|
|
|
|
key: train_precision
|
|
value: [1. 0.98181818 0.91891892 0.98275862 0.93846154 0.95714286
|
|
0.97142857 0.68 1. 1. ]
|
|
|
|
mean value: 0.9430528688459723
|
|
|
|
key: test_recall
|
|
value: [0.875 0.625 1. 0.71428571 0.85714286 0.85714286
|
|
0.875 1. 0.5 0.5 ]
|
|
|
|
mean value: 0.7803571428571429
|
|
|
|
key: train_recall
|
|
value: [1. 0.79411765 0.98550725 0.82608696 0.88405797 0.97101449
|
|
1. 1. 0.86764706 0.61764706]
|
|
|
|
mean value: 0.8946078431372549
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.75 0.8125 0.73214286 0.80357143 0.92857143
|
|
0.9375 0.71428571 0.67857143 0.67857143]
|
|
|
|
mean value: 0.7848214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 0.88970588 0.94863598 0.90569054 0.91261722 0.96344842
|
|
0.98550725 0.76811594 0.93382353 0.80882353]
|
|
|
|
mean value: 0.9116368286445012
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.55555556 0.7 0.55555556 0.66666667 0.85714286
|
|
0.875 0.66666667 0.44444444 0.44444444]
|
|
|
|
mean value: 0.646547619047619
|
|
|
|
key: train_jcc
|
|
value: [1. 0.7826087 0.90666667 0.81428571 0.83561644 0.93055556
|
|
0.97142857 0.68 0.86764706 0.61764706]
|
|
|
|
mean value: 0.8406455759591905
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.63
|
|
|
|
Model_name: AdaBoost Classifier
|
|
Model func: AdaBoostClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', AdaBoostClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.1090014 0.09479475 0.09645629 0.09737253 0.09540439 0.09643078
|
|
0.09812617 0.09583139 0.09525943 0.09584975]
|
|
|
|
mean value: 0.09745268821716309
|
|
|
|
key: score_time
|
|
value: [0.01493382 0.01502466 0.01518154 0.01484466 0.01519299 0.01544476
|
|
0.01564693 0.01502728 0.01509666 0.01492715]
|
|
|
|
mean value: 0.01513204574584961
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.8819171 1. 0.875 0.76376262 0.73214286
|
|
1. 1. 0.76376262 0.87287156]
|
|
|
|
mean value: 0.8771373857115167
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.9375 1. 0.93333333 0.86666667 0.86666667
|
|
1. 1. 0.86666667 0.93333333]
|
|
|
|
mean value: 0.9341666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.93333333 1. 0.93333333 0.875 0.85714286
|
|
1. 1. 0.85714286 0.94117647]
|
|
|
|
mean value: 0.9338305322128851
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 0.875 0.77777778 0.85714286
|
|
1. 1. 1. 0.88888889]
|
|
|
|
mean value: 0.9287698412698413
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9482142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.9375 1. 0.9375 0.875 0.86607143
|
|
1. 1. 0.875 0.92857143]
|
|
|
|
mean value: 0.9357142857142857
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.875 1. 0.875 0.77777778 0.75
|
|
1. 1. 0.75 0.88888889]
|
|
|
|
mean value: 0.8805555555555555
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.09
|
|
|
|
Accuracy on Blind test: 0.79
|
|
|
|
Model_name: Bagging Classifier
|
|
Model func: BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model',
|
|
BaggingClassifier(n_jobs=10, oob_score=True,
|
|
random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.03332615 0.03480697 0.04867983 0.03268933 0.03992987 0.04574323
|
|
0.03273129 0.03599477 0.03098392 0.04712749]
|
|
|
|
mean value: 0.03820128440856933
|
|
|
|
key: score_time
|
|
value: [0.01736021 0.0297308 0.04027843 0.02309608 0.03837204 0.02011347
|
|
0.02397585 0.02971005 0.03037763 0.02972603]
|
|
|
|
mean value: 0.028274059295654297
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.8819171 1. 1. 0.60714286 0.73214286
|
|
1. 1. 0.60714286 1. ]
|
|
|
|
mean value: 0.8710262778804965
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 0.98550725 0.98550725 0.98550725
|
|
1. 0.98550418 0.98550418 1. ]
|
|
|
|
mean value: 0.9927530097707291
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.9375 1. 1. 0.8 0.86666667
|
|
1. 1. 0.8 1. ]
|
|
|
|
mean value: 0.9341666666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 0.99270073 0.99270073 0.99270073
|
|
1. 0.99270073 0.99270073 1. ]
|
|
|
|
mean value: 0.9963503649635037
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.93333333 1. 1. 0.8 0.85714286
|
|
1. 1. 0.8 1. ]
|
|
|
|
mean value: 0.9331652661064426
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 0.99270073 0.99270073 0.99270073
|
|
1. 0.99259259 0.99259259 1. ]
|
|
|
|
mean value: 0.9963287374966208
|
|
|
|
key: test_precision
|
|
value: [0.88888889 1. 1. 1. 0.75 0.85714286
|
|
1. 1. 0.85714286 1. ]
|
|
|
|
mean value: 0.9353174603174603
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 0.85714286 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9339285714285714
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 0.98550725 0.98550725 0.98550725
|
|
1. 0.98529412 0.98529412 1. ]
|
|
|
|
mean value: 0.9927109974424553
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.9375 1. 1. 0.80357143 0.86607143
|
|
1. 1. 0.80357143 1. ]
|
|
|
|
mean value: 0.9348214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 0.99275362 0.99275362 0.99275362
|
|
1. 0.99264706 0.99264706 1. ]
|
|
|
|
mean value: 0.9963554987212276
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.875 1. 1. 0.66666667 0.75
|
|
1. 1. 0.66666667 1. ]
|
|
|
|
mean value: 0.8847222222222222
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 0.98550725 0.98550725 0.98550725
|
|
1. 0.98529412 0.98529412 1. ]
|
|
|
|
mean value: 0.9927109974424553
|
|
|
|
MCC on Blind test: 0.13
|
|
|
|
Accuracy on Blind test: 0.86
|
|
|
|
Model_name: Gaussian Process
|
|
Model func: GaussianProcessClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GaussianProcessClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0346148 0.05789852 0.04053473 0.04679036 0.02014375 0.02017975
|
|
0.02011824 0.0213201 0.04365182 0.05004883]
|
|
|
|
mean value: 0.03553009033203125
|
|
|
|
key: score_time
|
|
value: [0.02155566 0.02402639 0.01512194 0.02006745 0.01230836 0.01237655
|
|
0.01236105 0.02265072 0.02061892 0.0222733 ]
|
|
|
|
mean value: 0.018336033821105956
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.40451992 0.875 0.875 0.76376262 0.46428571
|
|
0.49099025 0.87287156 0.19642857 0.6000992 ]
|
|
|
|
mean value: 0.6172898619490848
|
|
|
|
key: train_mcc
|
|
value: [0.98540068 0.98540068 0.98550418 0.98550418 0.98550418 0.98550418
|
|
0.98550725 0.98550725 1. 0.98550725]
|
|
|
|
mean value: 0.9869339808783808
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.6875 0.93333333 0.93333333 0.86666667 0.73333333
|
|
0.73333333 0.93333333 0.6 0.8 ]
|
|
|
|
mean value: 0.8033333333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.99264706 0.99264706 0.99270073 0.99270073 0.99270073 0.99270073
|
|
0.99270073 0.99270073 1. 0.99270073]
|
|
|
|
mean value: 0.993419922713611
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.73684211 0.93333333 0.93333333 0.875 0.71428571
|
|
0.71428571 0.94117647 0.625 0.82352941]
|
|
|
|
mean value: 0.81203154946189
|
|
|
|
key: train_fscore
|
|
value: [0.99270073 0.99270073 0.99280576 0.99280576 0.99280576 0.99280576
|
|
0.99270073 0.99270073 1. 0.99270073]
|
|
|
|
mean value: 0.9934726671217771
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.63636364 0.875 0.875 0.77777778 0.71428571
|
|
0.83333333 0.88888889 0.625 0.77777778]
|
|
|
|
mean value: 0.7781204906204906
|
|
|
|
key: train_precision
|
|
value: [0.98550725 0.98550725 0.98571429 0.98571429 0.98571429 0.98571429
|
|
0.98550725 0.98550725 1. 0.98550725]
|
|
|
|
mean value: 0.9870393374741201
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 0.71428571
|
|
0.625 1. 0.625 0.875 ]
|
|
|
|
mean value: 0.8589285714285715
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.6875 0.9375 0.9375 0.875 0.73214286
|
|
0.74107143 0.92857143 0.59821429 0.79464286]
|
|
|
|
mean value: 0.8044642857142857
|
|
|
|
key: train_roc_auc
|
|
value: [0.99264706 0.99264706 0.99264706 0.99264706 0.99264706 0.99264706
|
|
0.99275362 0.99275362 1. 0.99275362]
|
|
|
|
mean value: 0.9934143222506394
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.58333333 0.875 0.875 0.77777778 0.55555556
|
|
0.55555556 0.88888889 0.45454545 0.7 ]
|
|
|
|
mean value: 0.6965656565656566
|
|
|
|
key: train_jcc
|
|
value: [0.98550725 0.98550725 0.98571429 0.98571429 0.98571429 0.98571429
|
|
0.98550725 0.98550725 1. 0.98550725]
|
|
|
|
mean value: 0.9870393374741201
|
|
|
|
MCC on Blind test: -0.06
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Gradient Boosting
|
|
Model func: GradientBoostingClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', GradientBoostingClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.2400794 0.23439431 0.23425388 0.2315309 0.26206779 0.24720073
|
|
0.23333836 0.23660707 0.22880101 0.19629478]
|
|
|
|
mean value: 0.23445682525634765
|
|
|
|
key: score_time
|
|
value: [0.00921178 0.00896645 0.00896502 0.00888395 0.00920105 0.00898576
|
|
0.00909066 0.00913382 0.00911546 0.00896382]
|
|
|
|
mean value: 0.009051775932312012
|
|
|
|
key: test_mcc
|
|
value: [0.8819171 0.75 1. 1. 0.875 0.73214286
|
|
1. 1. 0.60714286 0.87287156]
|
|
|
|
mean value: 0.871907437891788
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.9375 0.875 1. 1. 0.93333333 0.86666667
|
|
1. 1. 0.8 0.93333333]
|
|
|
|
mean value: 0.9345833333333333
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.94117647 0.875 1. 1. 0.93333333 0.85714286
|
|
1. 1. 0.8 0.94117647]
|
|
|
|
mean value: 0.9347829131652661
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [0.88888889 0.875 1. 1. 0.875 0.85714286
|
|
1. 1. 0.85714286 0.88888889]
|
|
|
|
mean value: 0.9242063492063491
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [1. 0.875 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9482142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.9375 0.875 1. 1. 0.9375 0.86607143
|
|
1. 1. 0.80357143 0.92857143]
|
|
|
|
mean value: 0.9348214285714286
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.88888889 0.77777778 1. 1. 0.875 0.75
|
|
1. 1. 0.66666667 0.88888889]
|
|
|
|
mean value: 0.8847222222222222
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.11
|
|
|
|
Accuracy on Blind test: 0.83
|
|
|
|
Model_name: QDA
|
|
Model func: QuadraticDiscriminantAnalysis()
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: /home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/discriminant_analysis.py:887: UserWarning: Variables are collinear
|
|
warnings.warn("Variables are collinear")
|
|
/home/tanu/anaconda3/envs/UQ/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1327: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
|
|
_warn_prf(average, modifier, msg_start, len(result))
|
|
Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', QuadraticDiscriminantAnalysis())])
|
|
|
|
key: fit_time
|
|
value: [0.01696777 0.01680088 0.02019072 0.01669979 0.02372169 0.01697087
|
|
0.01702762 0.016783 0.01727152 0.01699018]
|
|
|
|
mean value: 0.017942404747009276
|
|
|
|
key: score_time
|
|
value: [0.01224136 0.01189041 0.01206803 0.01199365 0.01204085 0.01464152
|
|
0.0147686 0.01463866 0.01480031 0.0144105 ]
|
|
|
|
mean value: 0.013349390029907227
|
|
|
|
key: test_mcc
|
|
value: [0.77459667 0.8819171 0.75592895 0.75592895 0.75592895 0.75592895
|
|
0.46770717 1. 0.56407607 0.66143783]
|
|
|
|
mean value: 0.7373450632934155
|
|
|
|
key: train_mcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_accuracy
|
|
value: [0.875 0.9375 0.86666667 0.86666667 0.86666667 0.86666667
|
|
0.66666667 1. 0.73333333 0.8 ]
|
|
|
|
mean value: 0.8479166666666667
|
|
|
|
key: train_accuracy
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_fscore
|
|
value: [0.85714286 0.93333333 0.83333333 0.83333333 0.83333333 0.83333333
|
|
0.54545455 1. 0.66666667 0.76923077]
|
|
|
|
mean value: 0.8105161505161504
|
|
|
|
key: train_fscore
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: train_precision
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_recall
|
|
value: [0.75 0.875 0.71428571 0.71428571 0.71428571 0.71428571
|
|
0.375 1. 0.5 0.625 ]
|
|
|
|
mean value: 0.6982142857142857
|
|
|
|
key: train_recall
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_roc_auc
|
|
value: [0.875 0.9375 0.85714286 0.85714286 0.85714286 0.85714286
|
|
0.6875 1. 0.75 0.8125 ]
|
|
|
|
mean value: 0.8491071428571428
|
|
|
|
key: train_roc_auc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
key: test_jcc
|
|
value: [0.75 0.875 0.71428571 0.71428571 0.71428571 0.71428571
|
|
0.375 1. 0.5 0.625 ]
|
|
|
|
mean value: 0.6982142857142857
|
|
|
|
key: train_jcc
|
|
value: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
|
|
|
|
mean value: 1.0
|
|
|
|
MCC on Blind test: 0.0
|
|
|
|
Accuracy on Blind test: 0.99
|
|
|
|
Model_name: Ridge Classifier
|
|
Model func: RidgeClassifier(random_state=42)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifier(random_state=42))])
|
|
|
|
key: fit_time
|
|
value: [0.0292964 0.04019451 0.03581357 0.03268957 0.03282976 0.0328927
|
|
0.02812839 0.0422287 0.03068423 0.03335238]
|
|
|
|
mean value: 0.03381102085113526
|
|
|
|
key: score_time
|
|
value: [0.02089906 0.02192187 0.02337551 0.02003455 0.02072692 0.0224824
|
|
0.02318764 0.01997781 0.02314067 0.01798964]
|
|
|
|
mean value: 0.021373605728149413
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.75 1. 0.76376262 0.66143783 0.87287156
|
|
1. 1. 0.46428571 0.87287156]
|
|
|
|
mean value: 0.8015170068114487
|
|
|
|
key: train_mcc
|
|
value: [0.97058824 0.97058824 0.97080136 0.97080136 0.97080136 0.97080136
|
|
0.97080136 0.97080136 0.98550418 0.98550418]
|
|
|
|
mean value: 0.9736993013308314
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.875 1. 0.86666667 0.8 0.93333333
|
|
1. 1. 0.73333333 0.93333333]
|
|
|
|
mean value: 0.8954166666666666
|
|
|
|
key: train_accuracy
|
|
value: [0.98529412 0.98529412 0.98540146 0.98540146 0.98540146 0.98540146
|
|
0.98540146 0.98540146 0.99270073 0.99270073]
|
|
|
|
mean value: 0.986839845427222
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.875 1. 0.875 0.82352941 0.92307692
|
|
1. 1. 0.75 0.94117647]
|
|
|
|
mean value: 0.901131221719457
|
|
|
|
key: train_fscore
|
|
value: [0.98529412 0.98529412 0.98550725 0.98550725 0.98550725 0.98550725
|
|
0.98529412 0.98529412 0.99259259 0.99259259]
|
|
|
|
mean value: 0.9868390641280668
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.875 1. 0.77777778 0.7 1.
|
|
1. 1. 0.75 0.88888889]
|
|
|
|
mean value: 0.8769444444444444
|
|
|
|
key: train_precision
|
|
value: [0.98529412 0.98529412 0.98550725 0.98550725 0.98550725 0.98550725
|
|
0.98529412 0.98529412 1. 1. ]
|
|
|
|
mean value: 0.9883205456095482
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9357142857142857
|
|
|
|
key: train_recall
|
|
value: /home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:195: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_CT.sort_values(by = ['test_mcc'], ascending = False, inplace = True)
|
|
/home/tanu/git/LSHTM_analysis/scripts/ml/./gid_orig.py:198: SettingWithCopyWarning:
|
|
A value is trying to be set on a copy of a slice from a DataFrame
|
|
|
|
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
|
|
rouC_BT.sort_values(by = ['bts_mcc'], ascending = False, inplace = True)
|
|
[0.98529412 0.98529412 0.98550725 0.98550725 0.98550725 0.98550725
|
|
0.98529412 0.98529412 0.98529412 0.98529412]
|
|
|
|
mean value: 0.98537936913896
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.875 1. 0.875 0.8125 0.92857143
|
|
1. 1. 0.73214286 0.92857143]
|
|
|
|
mean value: 0.8964285714285715
|
|
|
|
key: train_roc_auc
|
|
value: [0.98529412 0.98529412 0.98540068 0.98540068 0.98540068 0.98540068
|
|
0.98540068 0.98540068 0.99264706 0.99264706]
|
|
|
|
mean value: 0.9868286445012788
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.77777778 1. 0.77777778 0.7 0.85714286
|
|
1. 1. 0.6 0.88888889]
|
|
|
|
mean value: 0.8301587301587301
|
|
|
|
key: train_jcc
|
|
value: [0.97101449 0.97101449 0.97142857 0.97142857 0.97142857 0.97142857
|
|
0.97101449 0.97101449 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9740360492022896
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.67
|
|
|
|
Model_name: Ridge ClassifierCV
|
|
Model func: RidgeClassifierCV(cv=10)
|
|
List of models: [('Logistic Regression', LogisticRegression(random_state=42)), ('Logistic RegressionCV', LogisticRegressionCV(random_state=42)), ('Gaussian NB', GaussianNB()), ('Naive Bayes', BernoulliNB()), ('K-Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC(random_state=42)), ('MLP', MLPClassifier(max_iter=500, random_state=42)), ('Decision Tree', DecisionTreeClassifier(random_state=42)), ('Extra Trees', ExtraTreesClassifier(random_state=42)), ('Extra Tree', ExtraTreeClassifier(random_state=42)), ('Random Forest', RandomForestClassifier(n_estimators=1000, random_state=42)), ('Random Forest2', RandomForestClassifier(max_features='auto', min_samples_leaf=5,
|
|
n_estimators=1000, n_jobs=10, oob_score=True,
|
|
random_state=42)), ('Naive Bayes', BernoulliNB()), ('XGBoost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
|
|
colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
|
|
gamma=0, gpu_id=-1, importance_type=None,
|
|
interaction_constraints='', learning_rate=0.300000012,
|
|
max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
|
|
monotone_constraints='()', n_estimators=100, n_jobs=12,
|
|
num_parallel_tree=1, predictor='auto', random_state=42,
|
|
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
|
|
tree_method='exact', use_label_encoder=False,
|
|
validate_parameters=1, verbosity=0)), ('LDA', LinearDiscriminantAnalysis()), ('Multinomial', MultinomialNB()), ('Passive Aggresive', PassiveAggressiveClassifier(n_jobs=10, random_state=42)), ('Stochastic GDescent', SGDClassifier(n_jobs=10, random_state=42)), ('AdaBoost Classifier', AdaBoostClassifier(random_state=42)), ('Bagging Classifier', BaggingClassifier(n_jobs=10, oob_score=True, random_state=42)), ('Gaussian Process', GaussianProcessClassifier(random_state=42)), ('Gradient Boosting', GradientBoostingClassifier(random_state=42)), ('QDA', QuadraticDiscriminantAnalysis()), ('Ridge Classifier', RidgeClassifier(random_state=42)), ('Ridge ClassifierCV', RidgeClassifierCV(cv=10))]
|
|
Running model pipeline: Pipeline(steps=[('prep',
|
|
ColumnTransformer(remainder='passthrough',
|
|
transformers=[('num', MinMaxScaler(),
|
|
Index(['ligand_distance', 'ligand_affinity_change', 'duet_stability_change',
|
|
'ddg_foldx', 'deepddg', 'ddg_dynamut2', 'mmcsm_lig', 'contacts',
|
|
'mcsm_na_affinity', 'rsa',
|
|
...
|
|
'VENM980101', 'VOGG950101', 'WEIL970101', 'WEIL970102', 'ZHAC000101',
|
|
'ZHAC000102', 'ZHAC000103', 'ZHAC000104', 'ZHAC000105', 'ZHAC000106'],
|
|
dtype='object', length=167)),
|
|
('cat', OneHotEncoder(),
|
|
Index(['ss_class', 'aa_prop_change', 'electrostatics_change',
|
|
'polarity_change', 'water_change', 'drtype_mode_labels', 'active_site'],
|
|
dtype='object'))])),
|
|
('model', RidgeClassifierCV(cv=10))])
|
|
|
|
key: fit_time
|
|
value: [0.26786852 0.21878886 0.17795444 0.20230484 0.09808445 0.19803524
|
|
0.20462227 0.25141764 0.24799275 0.22460699]
|
|
|
|
mean value: 0.20916759967803955
|
|
|
|
key: score_time
|
|
value: [0.0210011 0.01702762 0.02308965 0.01346898 0.02083993 0.01430988
|
|
0.01592922 0.01662874 0.02179027 0.02321339]
|
|
|
|
mean value: 0.01872987747192383
|
|
|
|
key: test_mcc
|
|
value: [0.62994079 0.75 1. 0.76376262 0.76376262 0.87287156
|
|
1. 1. 0.46428571 0.87287156]
|
|
|
|
mean value: 0.8117494856174312
|
|
|
|
key: train_mcc
|
|
value: [0.97058824 0.97058824 0.97080136 0.97080136 0.98550418 0.97080136
|
|
0.97080136 0.97080136 0.98550418 0.98550418]
|
|
|
|
mean value: 0.975169582857287
|
|
|
|
key: test_accuracy
|
|
value: [0.8125 0.875 1. 0.86666667 0.86666667 0.93333333
|
|
1. 1. 0.73333333 0.93333333]
|
|
|
|
mean value: 0.9020833333333333
|
|
|
|
key: train_accuracy
|
|
value: [0.98529412 0.98529412 0.98540146 0.98540146 0.99270073 0.98540146
|
|
0.98540146 0.98540146 0.99270073 0.99270073]
|
|
|
|
mean value: 0.9875697724345213
|
|
|
|
key: test_fscore
|
|
value: [0.82352941 0.875 1. 0.875 0.875 0.92307692
|
|
1. 1. 0.75 0.94117647]
|
|
|
|
mean value: 0.9062782805429864
|
|
|
|
key: train_fscore
|
|
value: [0.98529412 0.98529412 0.98550725 0.98550725 0.99280576 0.98550725
|
|
0.98529412 0.98529412 0.99259259 0.99259259]
|
|
|
|
mean value: 0.9875689150299539
|
|
|
|
key: test_precision
|
|
value: [0.77777778 0.875 1. 0.77777778 0.77777778 1.
|
|
1. 1. 0.75 0.88888889]
|
|
|
|
mean value: 0.8847222222222222
|
|
|
|
key: train_precision
|
|
value: [0.98529412 0.98529412 0.98550725 0.98550725 0.98571429 0.98550725
|
|
0.98529412 0.98529412 1. 1. ]
|
|
|
|
mean value: 0.9883412495432956
|
|
|
|
key: test_recall
|
|
value: [0.875 0.875 1. 1. 1. 0.85714286
|
|
1. 1. 0.75 1. ]
|
|
|
|
mean value: 0.9357142857142857
|
|
|
|
key: train_recall
|
|
value: [0.98529412 0.98529412 0.98550725 0.98550725 1. 0.98550725
|
|
0.98529412 0.98529412 0.98529412 0.98529412]
|
|
|
|
mean value: 0.9868286445012788
|
|
|
|
key: test_roc_auc
|
|
value: [0.8125 0.875 1. 0.875 0.875 0.92857143
|
|
1. 1. 0.73214286 0.92857143]
|
|
|
|
mean value: 0.9026785714285714
|
|
|
|
key: train_roc_auc
|
|
value: [0.98529412 0.98529412 0.98540068 0.98540068 0.99264706 0.98540068
|
|
0.98540068 0.98540068 0.99264706 0.99264706]
|
|
|
|
mean value: 0.9875532821824382
|
|
|
|
key: test_jcc
|
|
value: [0.7 0.77777778 1. 0.77777778 0.77777778 0.85714286
|
|
1. 1. 0.6 0.88888889]
|
|
|
|
mean value: 0.837936507936508
|
|
|
|
key: train_jcc
|
|
value: [0.97101449 0.97101449 0.97142857 0.97142857 0.98571429 0.97142857
|
|
0.97101449 0.97101449 0.98529412 0.98529412]
|
|
|
|
mean value: 0.975464620630861
|
|
|
|
MCC on Blind test: 0.12
|
|
|
|
Accuracy on Blind test: 0.67
|